Date: (Sun) Aug 02, 2015

Introduction:

Data: Source: Training: https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTrain.csv
New: https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTest.csv
Time period:

Synopsis:

Based on analysis utilizing <> techniques, :

Regression results: First run: : OOB_RMSE=<0.4f>; new_RMSE=<0.4f>; =; =

Classification results: template: prdline.my == “Unknown” -> 296 Low.cor.X.glm: Leaderboard: 0.83458 newobs_tbl=[N=471, Y=327]; submit_filename=template_Final_glm_submit.csv OOB_conf_mtrx=[YN=125, NY=76]=201; max.Accuracy.OOB=0.7710; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=95.42; productline=49.22; D.T.like=29.75; D.T.use=26.32; D.T.box=21.53;

prdline: -> Worse than template prdline.my == “Unknown” -> 285 All.X.no.rnorm.rf: Leaderboard: 0.82649 newobs_tbl=[N=485, Y=313]; submit_filename=prdline_Final_rf_submit.csv OOB_conf_mtrx=[YN=119, NY=80]=199; max.Accuracy.OOB=0.8339; opt.prob.threshold.OOB=0.5 startprice=100.00; biddable=84.25; D.sum.TfIdf=7.28; D.T.use=4.26; D.T.veri=2.78; D.T.scratch=1.99; D.T.box=; D.T.like=; Low.cor.X.glm: Leaderboard: 0.81234 newobs_tbl=[N=471, Y=327]; submit_filename=prdline_Low_cor_X_glm_submit.csv OOB_conf_mtrx=[YN=125, NY=74]=199; max.Accuracy.OOB=0.8205; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=96.07; prdline.my=51.37; D.T.like=29.39; D.T.use=25.43; D.T.box=22.27; D.T.veri=; D.T.scratch=;

oobssmpl: -> Low.cor.X.glm: Leaderboard: 0.83402 newobs_tbl=[N=440, Y=358]; submit_filename=oobsmpl_Final_glm_submit OOB_conf_mtrx=[YN=114, NY=84]=198; max.Accuracy.OOB=0.7780; opt.prob.threshold.OOB=0.5 startprice=100.00; biddable=93.87; prdline.my=60.48; D.sum.TfIdf=; D.T.condition=8.69; D.T.screen=7.96; D.T.use=7.50; D.T.veri=; D.T.scratch=;

category: -> Low.cor.X.glm: Leaderboard: 0.82381 newobs_tbl=[N=470, Y=328]; submit_filename=category_Final_glm_submit OOB_conf_mtrx=[YN=119, NY=57]=176; max.Accuracy.OOB=0.8011; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=79.19; prdline.my=55.22; D.sum.TfIdf=; D.T.ipad=27.05; D.T.like=21.44; D.T.box=20.67; D.T.condition=; D.T.screen=;

dataclns: -> All.X.no.rnorm.rf: Leaderboard: 0.82211 newobs_tbl=[N=485, Y=313]; submit_filename=dataclns_Final_rf_submit OOB_conf_mtrx=[YN=104, NY=75]=179; max.Accuracy.OOB=0.7977; opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=65.85; prdline.my=7.74; D.sum.TfIdf=; D.T.use=2.01; D.T.condition=1.87; D.T.veri=1.62; D.T.ipad=; D.T.like=; Low.cor.X.glm: Leaderboard: 0.79264 newobs_tbl=[N=460, Y=338]; submit_filename=dataclns_Low_cor_X_glm_submit OOB_conf_mtrx=[YN=113, NY=74]=187; max.Accuracy.OOB=0.7977; opt.prob.threshold.OOB=0.5 -> different from prev run of 0.6 biddable=100.00; startprice.log=91.85; prdline.my=38.34; D.sum.TfIdf=; D.T.ipad=29.92; D.T.box=27.76; D.T.work=25.79; D.T.use=; D.T.condition=;

txtterms: -> top_n = c(10) Low.cor.X.glm: Leaderboard: 0.81448 newobs_tbl=[N=442, Y=356]; submit_filename=txtterms_Final_glm_submit OOB_conf_mtrx=[YN=113, NY=69]=182; max.Accuracy.OOB=0.7943; opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=90.11; prdline.my=37.65; D.sum.TfIdf=; D.T.ipad=28.67; D.T.work=24.90; D.T.great=21.44; # [1] “D.T.condit” “D.T.condition” “D.T.good” “D.T.ipad” “D.T.new”
# [6] “D.T.scratch” “D.T.screen” “D.T.this” “D.T.use” “D.T.work”

All.X.glm: Leaderboard: 0.81016
    newobs_tbl=[N=445, Y=353]; submit_filename=txtterms_Final_glm_submit
    OOB_conf_mtrx=[YN=108, NY=72]=180; max.Accuracy.OOB=0.7966;
        opt.prob.threshold.OOB=0.5
        biddable=100.00; startprice.log=88.24; prdline.my=33.81; D.sum.TfIdf=; 
        D.T.scratch=25.51; D.T.use=18.97; D.T.good=16.37; 

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.great” “D.T.excel” “D.T.work” “D.T.ipad”

Max.cor.Y.rpart: Leaderboard: 0.79258
    newobs_tbl=[N=439, Y=359]; submit_filename=txtterms_Final_rpart_submit
    OOB_conf_mtrx=[YN=105, NY=76]=181; max.Accuracy.OOB=0.7954802;
        opt.prob.threshold.OOB=0.5
        startprice.log=100; biddable=; prdline.my=; D.sum.TfIdf=; 
        D.T.scratch=; D.T.use=; D.T.good=; 

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

All.X.no.rnorm.rf: Leaderboard: 0.80929
    newobs_tbl=[N=545, Y=253]; submit_filename=txtterms_Final_rf_submit
    OOB_conf_mtrx=[YN=108, NY=61]=169; max.Accuracy.OOB=0.8090395
        opt.prob.threshold.OOB=0.5
        startprice.log=100.00; biddable=78.82; idseq.my=63.43; prdline.my=45.57;
        D.T.use=2.76; D.T.condit=2.35; D.T.scratch=2.00; D.T.good=; 

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

txtclstr: All.X.no.rnorm.rf: Leaderboard: 0.79363 -> 0.79573 newobs_tbl=[N=537, Y=261]; submit_filename=txtclstr_Final_rf_submit OOB_conf_mtrx=[YN=104, NY=61]=165; max.Accuracy.OOB=0.8135593 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=79.99; idseq.my=64.94; prdline.my=4.14; prdline.my.clusterid=1.15; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

dupobs: All.X.no.rnorm.rf: Leaderboard: 0.79295 newobs_tbl=[N=541, Y=257]; submit_filename=dupobs_Final_rf_submit OOB_conf_mtrx=[YN=114, NY=65]=179; max.Accuracy.OOB=0.7977401 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=94.49; idseq.my=67.40; prdline.my=4.48; prdline.my.clusterid=1.99; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

All.X.no.rnorm.rf: Leaderboard: 0.79652
    newobs_tbl=[N=523, Y=275]; submit_filename=dupobs_Final_rf_submit
    OOB_conf_mtrx=[YN=114, NY=65]=179; max.Accuracy.OOB=0.7977401
        opt.prob.threshold.OOB=0.5
        startprice.log=100.00; biddable=94.24; idseq.my=67.92; 
            prdline.my=4.33; prdline.my.clusterid=2.17; 

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

csmmdl: All.X.no.rnorm.rf: Leaderboard: 0.79396 newobs_tbl=[N=525, Y=273]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=111, NY=66]=177; max.Accuracy.OOB=0.8000000 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=90.30; idseq.my=67.06; prdline.my=4.40; cellular.fctr=3.57; prdline.my.clusterid=2.08;

All.Interact.X.no.rnorm.rf: Leaderboard: 0.77867 newobs_tbl=[N=564, Y=234]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=120, NY=53]=173; max.Accuracy.OOB=0.8045198 opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=93.99; idseq.my=57.30; prdline.my=9.09; cellular.fctr=3.30; prdline.my.clusterid=2.35;

All.Interact.X.no.rnorm.rf: Leaderboard: 0.77152 newobs_tbl=[N=539, Y=259]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=, NY=]=; max.Accuracy.OOB=0.8011299 opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=94.93; idseq.my=57.12; prdline.my=9.29; cellular.fctr=3.20; prdline.my.clusterid=2.50; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

    All.X.glmnet: 
        fit_RMSE=???; OOB_RMSE=115.1247; new_RMSE=115.1247; 
        prdline.my.fctr=100.00; condition.fctrNew=88.53; D.npnct09.log=84.34
            biddable=16.48; idseq.my=57.27;

spdiff:
All.Interact.X.no.rnorm.rf: Leaderboard: 0.78218 newobs_tbl=[N=517, Y=281]; submit_filename=spdiff_Final_rf_submit OOB_conf_mtrx=[YN=121, NY=38]=159; max.Accuracy.OOB=0.8203390 opt.prob.threshold.OOB=0.6 biddable=100.00; startprice.diff=57.53; idseq.my=41.31; prdline.my=11.43; cellular.fctr=2.36; prdline.my.clusterid=1.82;

    All.X.no.rnorm.rf: 
        fit_RMSE=92.19; OOB_RMSE=130.86; new_RMSE=130.86; 
        biddable=100.00; prdline.my.fctr=61.92; idseq.my=57.77;
            condition.fctr=29.53; storage.fctr=11.22; color.fctr=6.69;
            cellular.fctr=6.11
            
All.X.no.rnorm.rf: Leaderboard: 0.77443
    newobs_tbl=[N=606, Y=192]; submit_filename=spdiff_Final_rf_submit
    OOB_conf_mtrx=[YN=112, NY=28]=140; max.Accuracy.OOB=0.8418079
        opt.prob.threshold.OOB=0.6
        startprice.diff=100.00; biddable=96.53; idseq.my=38.10; 
            prdline.my=3.65; cellular.fctr=2.21; prdline.my.clusterid=0.91; 

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

color: All.Interact.X.glmnet: fit_RMSE=88.64520; prdline.my.fctr:D.TfIdf.sum.stem.stop.Ratio=100.00; prdline.my.fctr:condition.fctr=77.35 D.TfIdf.sum.stem.stop.Ratio=68.18 prdline.my.fctr:color.fctr=68.12 prdline.my.fctr:storage.fctr=63.32

All.X.no.rnorm.rf: Leaderboard: 0.80638
    newobs_tbl=[N=550, Y=248]; submit_filename=color_Final_rf_submit
    OOB_conf_mtrx=[YN=108, NY=54]=162; max.Accuracy.OOB=0.8169492
        opt.prob.threshold.OOB=0.5
        biddable=100.00; startprice.diff=77.90; idseq.my=48.49; 
            D.ratio.sum.TfIdf.nwrds=6.48; storage.fctr=4.74;
                D.TfIdf.sum.stem.stop.Ratio=4.57; prdline.my=4.32;

[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”

assctxt: select_terms: [1] “condit” “use” “scratch” “new” “good” “ipad” “screen” “great”
[9] “work” “excel” “like” “box” “function” “item” “fulli” “minor” [17] “cosmet” “crack” “mint” “wear”
assoc_terms: [1] “bare” “sign” “light” “back” “hous” “tab” “dent”
[8] “brand” “open” “mini” “appl” “air” “wifi” “affect”
[15] “protector” “shape” “perfect” “order” “button” “origin” “retail”
[22] “seal” “includ” “100” “may” “show” “overal” “bodi”
[29] “phone” “will” “damag” “near” “top” “normal” “tear”
[36] “expect” “minim”
glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6665 (97.3037 pct) All.Interact.X.glmnet: fit_RMSE=88.40723; prdline.my.fctr:D.TfIdf.sum.stem.stop.Ratio=100.00; prdline.my.fctriPadAir:D.npnct01.log=79.67748; D.TfIdf.sum.stem.stop.Ratio=79.08192; prdline.my.fctriPadmini 2+:condition.fctrNew other (see details)=78.24020; prdline.my.fctriPad 3+:color.fctrSpace Gray=77.05886; prdline.my.fctriPadmini 2+:storage.fctrUnknown=75.68145; prdline.my.fctrUnknown:.clusterid.fctr3=74.23727;

All.Interact.X.no.rnorm.rf: Leaderboard: 0.72974
    newobs_tbl=[N=682, Y=116]; submit_filename=assctxt_Final_rf_submit
    OOB_conf_mtrx=[YN=125, NY=43]=168; max.Accuracy.OOB=0.8101695; max.auc.OOB=???;
        opt.prob.threshold.OOB=0.6
        biddable=100.00; startprice.diff=51.04; idseq.my=29.51; 
            startprice.diff:biddable=28.70
            prdline.my.fctriPadmini:idseq.my=6.89
    Highest max.auc.OOB=???; for model:        

ctgry2: select_terms: 50 assoc_terms: 103 glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6559 (96.7556 pct) All.Interact.X.glmnet: next: All.X.glmnet fit_RMSE=88.80010; prdl.my.descr.fctr:storage.fctr 100.00 prdl.my.descr.fctr:condition.fctr 93.96 prdl.my.descr.fctr:D.npnct01.log 89.94 D.TfIdf.sum.stem.stop.Ratio 75.90 prdl.my.descr.fctr:color.fctr 72.43 prdl.my.descr.fctr:.clusterid.fctr7 63.97 prdl.my.descr.fctr:D.npnct08.log 63.46 prdl.my.descr.fctr 63.05 prdl.my.descr.fctr:D.TfIdf.sum.stem.stop.Ratio 62.91 prdl.my.descr.fctr:D.npnct16.log 62.39

Ensemble.glmnet: Leaderboard: 0.80480
    newobs_tbl=[N=473, Y=325]; submit_filename=ctgry2_Final_glmnet_submit
    OOB_conf_mtrx=[YN=79, NY=101]=180; 
        max.Accuracy.OOB=0.7977528; max.auc.OOB=0.8554068; opt.prob.threshold.OOB=0.4
    Highest max.auc.OOB=0.8587215; for model:All.X.no.rnorm.rf     
        biddable                        100.000
        startprice.diff                  71.793
        idseq.my                         43.511
        

ensemble: select_terms: 50 assoc_terms: 103 glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6570 (96.9282 pct) Final.glment: min.RMSE.fit=31.45801 Ensemble.glmnet: min.RMSE.fit=30.67172 startprice.predict.All.Interact.X.no.rnorm.rf 100.000 startprice.predict.All.X.no.rnorm.rf 75.381 All.X.glmnet: min.RMSE.fit=88.98066 prdl.my.descr.fctr 100.00 D.TfIdf.sum.stem.stop.Ratio 92.16 condition.fctr 79.01 prdl.my.descr.fctr:.clusterid.fctr5 69.91 D.npnct16.log 61.70 color.fctrWhite 59.42 D.npnct01.log 55.07 cellular.fctr1 53.35 D.terms.n.post.stop 52.92

Ensemble.glmnet: Leaderboard: 0.73183
    newobs_tbl=[N=557, Y=241]; submit_filename=ensemble_Final_glmnet_submit
    OOB_conf_mtrx=[YN=75, NY=60]=135; 
        max.Accuracy.OOB=0.8483146; max.auc.OOB=0.9187365; opt.prob.threshold.OOB=0.5

sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.873608
Highest max.auc.OOB=0.9180131; for model:All.X.no.rnorm.rf
startprice.diff 100.000 biddable 95.318 idseq.my 33.365

Prediction Accuracy Enhancement Options:

  • import.data chunk:
    • which obs should be in fit vs. OOB (currently dirty.0 vs .1 is split 50%)
  • inspect.data chunk:
    • For date variables
      • Appropriate factors ?
      • Different / More last* features ?
  • scrub.data chunk:
  • transform.data chunk:
    • derive features from multiple features
  • manage.missing.data chunk:
    • Not fill missing vars
    • Fill missing numerics with a different algorithm
    • Fill missing chars with data based on clusters
  • extract.features chunk:
    • Text variables: move to date extraction chunk ???
      • Mine acronyms
      • Mine places
  • Review set_global_options chunk after features are finalized

[](.png)

Potential next steps include:

  • Organization:
    • Categorize by chunk
    • Priority criteria:
      1. Ease of change
      2. Impacts report
      3. Cleans innards
      4. Bug report
  • all chunks:
    • at chunk-end rm(!glb_)
  • manage.missing.data chunk:
    • cleaner way to manage re-splitting of training vs. new entity
  • extract.features chunk:
    • Add n-grams for glb_txt_vars
      • “RTextTools”, “tau”, “RWeka”, and “textcat” packages
    • Convert user-specified mutate code to config specs
  • fit.models chunk:
    • Prediction accuracy scatter graph:
    • Add tiles (raw vs. PCA)
    • Use shiny for drop-down of “important” features
    • Use plot.ly for interactive plots ?

    • Change .fit suffix of model metrics to .mdl if it’s data independent (e.g. AIC, Adj.R.Squared - is it truly data independent ?, etc.)
    • move model_type parameter to myfit_mdl before indep_vars_vctr (keep all model_* together)
    • create a custom model for rpart that has minbucket as a tuning parameter
    • varImp for randomForest crashes in caret version:6.0.41 -> submit bug report

  • Probability handling for multinomials vs. desired binomial outcome
  • ROCR currently supports only evaluation of binary classification tasks (version 1.0.7)
  • extensions toward multiclass classification are scheduled for the next release

  • Skip trControl.method=“cv” for dummy classifier ?
  • Add custom model to caret for a dummy (baseline) classifier (binomial & multinomial) that generates proba/outcomes which mimics the freq distribution of glb_rsp_var values; Right now glb_dmy_glm_mdl always generates most frequent outcome in training data
  • glm_dmy_mdl should use the same method as glm_sel_mdl until custom dummy classifer is implemented

  • fit.all.training chunk:
    • myplot_prediction_classification: displays ‘x’ instead of ‘+’ when there are no prediction errors
  • Compare glb_sel_mdl vs. glb_fin_mdl:
    • varImp
    • Prediction differences (shd be minimal ?)
  • Move glb_analytics_diag_plots to mydsutils.R: (+) Easier to debug (-) Too many glb vars used
  • Add print(ggplot.petrinet(glb_analytics_pn) + coord_flip()) at the end of every major chunk
  • Parameterize glb_analytics_pn
  • Move glb_impute_missing_data to mydsutils.R: (-) Too many glb vars used; glb_<>_df reassigned
  • Replicate myfit_mdl_classification features in myfit_mdl_regression
  • Do non-glm methods handle interaction terms ?
  • f-score computation for classifiers should be summation across outcomes (not just the desired one ?)
  • Add accuracy computation to glb_dmy_mdl in predict.data.new chunk
  • Why does splitting fit.data.training.all chunk into separate chunks add an overhead of ~30 secs ? It’s not rbind b/c other chunks have lower elapsed time. Is it the number of plots ?
  • Incorporate code chunks in print_sessionInfo
  • Test against
    • projects in github.com/bdanalytics
    • lectures in jhu-datascience track

Analysis:

rm(list=ls())
set.seed(12345)
options(stringsAsFactors=FALSE)
source("~/Dropbox/datascience/R/myscript.R")
source("~/Dropbox/datascience/R/mydsutils.R")
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
source("~/Dropbox/datascience/R/myplot.R")
source("~/Dropbox/datascience/R/mypetrinet.R")
source("~/Dropbox/datascience/R/myplclust.R")
# Gather all package requirements here
suppressPackageStartupMessages(require(doMC))
registerDoMC(4) # max(length(glb_txt_vars), glb_n_cv_folds) + 1
#packageVersion("tm")
#require(sos); findFn("cosine", maxPages=2, sortby="MaxScore")

# Analysis control global variables
glb_trnng_url <- "https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTrain.csv"
glb_newdt_url <- "https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTest.csv"
glb_out_pfx <- "ensemble_"
glb_save_envir <- FALSE # or TRUE

glb_is_separate_newobs_dataset <- TRUE    # or TRUE
    glb_split_entity_newobs_datasets <- TRUE   # or FALSE
    glb_split_newdata_method <- "sample"          # "condition" or "sample" or "copy"
    glb_split_newdata_condition <- NULL # or "is.na(<var>)"; "<var> <condition_operator> <value>"
    glb_split_newdata_size_ratio <- 0.3               # > 0 & < 1
    glb_split_sample.seed <- 123               # or any integer

glb_max_fitobs <- NULL # or any integer                         
glb_is_regression <- FALSE; glb_is_classification <- !glb_is_regression; 
    glb_is_binomial <- TRUE #or FALSE

glb_rsp_var_raw <- "sold"

# for classification, the response variable has to be a factor
glb_rsp_var <- "sold.fctr" # or glb_rsp_var_raw

# if the response factor is based on numbers/logicals e.g (0/1 OR TRUE/FALSE vs. "A"/"B"), 
#   or contains spaces (e.g. "Not in Labor Force")
#   caret predict(..., type="prob") crashes
glb_map_rsp_raw_to_var <- function(raw) {
#     return(log(raw))
    ret_vals <- rep_len(NA, length(raw)); ret_vals[!is.na(raw)] <- ifelse(raw[!is.na(raw)] == 1, "Y", "N"); return(relevel(as.factor(ret_vals), ref="N"))
#     #as.factor(paste0("B", raw))
#     #as.factor(gsub(" ", "\\.", raw))    
}
glb_map_rsp_raw_to_var(c(1, 1, 0, 0, NA))
## [1] Y    Y    N    N    <NA>
## Levels: N Y
glb_map_rsp_var_to_raw <- function(var) {
#     return(exp(var))
    as.numeric(var) - 1
#     #as.numeric(var)
#     #gsub("\\.", " ", levels(var)[as.numeric(var)])
#     c("<=50K", " >50K")[as.numeric(var)]
#     #c(FALSE, TRUE)[as.numeric(var)]
}
glb_map_rsp_var_to_raw(glb_map_rsp_raw_to_var(c(1, 1, 0, 0, NA)))
## [1]  1  1  0  0 NA
if ((glb_rsp_var != glb_rsp_var_raw) & is.null(glb_map_rsp_raw_to_var))
    stop("glb_map_rsp_raw_to_var function expected")
glb_rsp_var_out <- paste0(glb_rsp_var, ".predict.") # model_id is appended later

# List info gathered for various columns
# <col_name>:   <description>; <notes>
# description = The text description of the product provided by the seller.
# biddable = Whether this is an auction (biddable=1) or a sale with a fixed price (biddable=0).
# startprice = The start price (in US Dollars) for the auction (if biddable=1) or the sale price (if biddable=0).
# condition = The condition of the product (new, used, etc.)
# cellular = Whether the iPad has cellular connectivity (cellular=1) or not (cellular=0).
# carrier = The cellular carrier for which the iPad is equipped (if cellular=1); listed as "None" if cellular=0.
# color = The color of the iPad.
# storage = The iPad's storage capacity (in gigabytes).
# productline = The name of the product being sold.

# If multiple vars are parts of id, consider concatenating them to create one id var
# If glb_id_var == NULL, ".rownames <- row.names()" is the default
# Derive a numeric feature from id var
glb_id_var <- c("UniqueID")
glb_category_var <- c("prdline.my")
glb_drop_vars <- c(NULL) # or c("<col_name>")

glb_map_vars <- NULL # or c("<var1>", "<var2>")
glb_map_urls <- list();
# glb_map_urls[["<var1>"]] <- "<var1.url>"

glb_assign_pairs_lst <- NULL; 
# glb_assign_pairs_lst[["<var1>"]] <- list(from=c(NA),
#                                            to=c("NA.my"))
glb_assign_vars <- names(glb_assign_pairs_lst)

# Derived features
glb_derive_lst <- NULL;

# Add logs of numerics that are not distributed normally ->  do automatically ???

glb_derive_lst[["idseq.my"]] <- list(
    mapfn=function(UniqueID) { return(UniqueID - 10000) }    
    , args=c("UniqueID"))

glb_derive_lst[["prdline.my"]] <- list(
    mapfn=function(productline) { return(productline) }    
    , args=c("productline"))

glb_derive_lst[["startprice.log"]] <- list(
    mapfn=function(startprice) { return(log(startprice)) }    
    , args=c("startprice"))
# glb_derive_lst[["startprice.log.zval"]] <- list(

glb_derive_lst[["descr.my"]] <- list(
    mapfn=function(description) { mod_raw <- description;
        # Modifications for this exercise only
        # Add dictionary to stemDocument e.g. stickers stemmed to sticker ???
        mod_raw <- gsub("\\.\\.", "\\. ", mod_raw);    
        mod_raw <- gsub("(\\w)(\\*|,|-|/)(\\w)", "\\1\\2 \\3", mod_raw);
        
        mod_raw <- gsub("8\\.25", "825", mod_raw, ignore.case=TRUE);  
        mod_raw <- gsub(" 10\\.SCREEN ", " 10\\. SCREEN ", mod_raw, ignore.case=TRUE); 
        mod_raw <- gsub(" 128 gb ", " 128gb ", mod_raw, ignore.case=TRUE);  
        mod_raw <- gsub(" 16G, ", " 16GB, ", mod_raw, ignore.case=TRUE);          
        mod_raw <- gsub(" actuuly ", " actual ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" Apple care ", " Applecare ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" ans ", " and ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" bacK!wiped ", " bacK ! wiped ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" backplate", " back plate", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("\\bbarley", "barely", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" bend ", " bent ", mod_raw, ignore.case=TRUE);         
        mod_raw <- gsub("Best Buy", "BestBuy", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" black\\.Device ", " black \\. Device ", mod_raw,
                        ignore.case=TRUE);        
        mod_raw <- gsub("black\\),charger ", "black\\), charger ", mod_raw,
                        ignore.case=TRUE);        
        mod_raw <- gsub(" blocks", " blocked", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" brokenCharger ", " broken Charger ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" carefully ", " careful ", mod_raw, ignore.case=TRUE);        
        
        mod_raw <- gsub(" (conditon|condtion|contidion|conditions)", " condition", mod_raw,
                        ignore.case=TRUE);
        mod_raw <- gsub("(CONDITION|ONLY)\\.(\\w)", "\\1\\. \\2", mod_raw,
                        ignore.case=TRUE);
        mod_raw <- gsub("(condition)(Has)", "\\1\\. \\2", mod_raw);
        
        mod_raw <- gsub(" consist ", " consistent ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" cracksNo ", " cracks No ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" DEFAULTING ", " DEFAULT ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" definitely ", " definite ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" described", " describe", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" desciption", " description", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" devices", " device", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" Digi\\.", " Digitizer\\.", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" display\\.New ", " display\\. New ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" displays", " display", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" drop ", " dropped ", mod_raw, ignore.case=TRUE);         
        mod_raw <- gsub(" effect ", " affect ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" Excellant ", " Excellent ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" excellently", " excellent", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" EUC ", " excellent used condition", mod_raw, ignore.case=TRUE);  
        mod_raw <- gsub(" feels ", " feel ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" fineiCloud ", " fine iCloud ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" functioanlity", " functionality", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("^Gentle ", "Gently ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("\\(gray color", "\\(spacegray color", mod_raw, ignore.case=TRUE); 
        mod_raw <- gsub(" GREAT\\.SCreen ", " GREAT\\. SCreen ", mod_raw,
                        ignore.case=TRUE);        
        mod_raw <- gsub(" Framing ", " Frame ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("iCL0UD", "iCLOUD", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub("^iPad Black 3rd generation ", "iPad 3 Black ", mod_raw,
                        ignore.case=TRUE);  
        mod_raw <- gsub(" IMEINo ", " IMEI No ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" install\\. ", " installed\\. ", mod_raw, ignore.case=TRUE);   
        mod_raw <- gsub("inivisible", "invisible", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" manuals ", " manual ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" book ", " manual ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" mars ", " marks ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" marks\\.Absolutely ", " marks\\. Absolutely ", mod_raw,
                        ignore.case=TRUE);        
        mod_raw <- gsub(" minimum", " minimal", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" MINT\\.wiped ", " MINT\\. wiped ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" NEW\\!(SCREEN|ONE) ", " NEW\\! \\1 ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" new looking$", " looks new", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" newer ", " new ", mod_raw, ignore.case=TRUE);   
        mod_raw <- gsub(" oped ", " opened ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" opening", " opened", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" operated", " operational", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" perfectlycord ", " perfectly cord ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" performance", " performs", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" personalized ", " personal ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" products ", " product ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" Keeped ", " Kept ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" knicks ", " nicks ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("^READiPad ", "READ iPad ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" re- assemble ", " reassemble ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" REFURB\\.", "  REFURBISHED\\.", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" reponding", " respond", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" rotation ", " rotate ", mod_raw, ignore.case=TRUE);   
        mod_raw <- gsub(" Sales ", " Sale ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" scratchs ", " scratches ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" SCREEB ", " SCREEN ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" shipped| Shipment", " ship", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("shrink wrap", "shrinkwrap", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" sides ", " side ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" skinned,", " skin,", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("\\bspace (grey|gray)", "spacegray", mod_raw, ignore.case=TRUE); 
        mod_raw <- gsub(" spec ", " speck ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub("^somescratches ", "some scratches ", mod_raw, ignore.case=TRUE);  
        mod_raw <- gsub(" Sticker ", " Stickers ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub("SWAPPA\\.COM", "SWAPPACOM", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" T- Mobile", "  TMobile", mod_raw, ignore.case=TRUE);  
        mod_raw <- gsub(" touchscreen ", " touch screen ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" UnlockedCracked ", " Unlocked Cracked ", mod_raw,
                        ignore.case=TRUE);         
        mod_raw <- gsub(" uppser ", " upper ", mod_raw, ignore.case=TRUE); 
        mod_raw <- gsub(" use\\.Scratches ", " use\\. Scratches ", mod_raw,
                        ignore.case=TRUE);  
        mod_raw <- gsub(" verify ", " verified ", mod_raw, ignore.case=TRUE);        
        mod_raw <- gsub(" wear\\.Device ", " wear\\. Device ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" whats ", " what's ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" WiFi\\+4G ", " WiFi \\+ 4G ", mod_raw, ignore.case=TRUE);
        mod_raw <- gsub(" Zaag Invisible Shield", " Zaag InvisibleShield", mod_raw,
                        ignore.case=TRUE);
                                    return(mod_raw) }
    , args=c("description"))

#     mapfn=function(startprice) { return(scale(log(startprice))) }    
#     , args=c("startprice"))
#     mapfn=function(Rasmussen) { return(ifelse(sign(Rasmussen) >= 0, 1, 0)) }
#     mapfn=function(PropR) { return(as.factor(ifelse(PropR >= 0.5, "Y", "N"))) }
#     mapfn=function(purpose) { return(relevel(as.factor(purpose), ref="all_other")) }
#     mapfn=function(Week) { return(substr(Week, 1, 10)) }
#     mapfn=function(raw) { tfr_raw <- as.character(cut(raw, 5)); 
#                           tfr_raw[is.na(tfr_raw)] <- "NA.my";
#                           return(as.factor(tfr_raw)) }
#     , args=c("raw"))
#     mapfn=function(PTS, oppPTS) { return(PTS - oppPTS) }
#     , args=c("PTS", "oppPTS"))

# # If glb_allobs_df is not sorted in the desired manner
#     mapfn=function(Week) { return(coredata(lag(zoo(orderBy(~Week, glb_allobs_df)$ILI), -2, na.pad=TRUE))) }
#     mapfn=function(ILI) { return(coredata(lag(zoo(ILI), -2, na.pad=TRUE))) }
#     mapfn=function(ILI.2.lag) { return(log(ILI.2.lag)) }

# glb_derive_lst[["<txt_var>.niso8859.log"]] <- list(
#     mapfn=function(<txt_var>) { match_lst <- gregexpr("&#[[:digit:]]{3};", <txt_var>)
#                         match_num_vctr <- unlist(lapply(match_lst, 
#                                                         function(elem) length(elem)))
#                         return(log(1 + match_num_vctr)) }
#     , args=c("<txt_var>"))

#     mapfn=function(raw) { mod_raw <- raw;
#         mod_raw <- gsub("&#[[:digit:]]{3};", " ", mod_raw);
#         # Modifications for this exercise only
#         mod_raw <- gsub("\\bgoodIn ", "good In", mod_raw);
#                           return(mod_raw)

#         # Create user-specified pattern vectors 
# #sum(mycount_pattern_occ("Metropolitan Diary:", glb_allobs_df$Abstract) > 0)
#         if (txt_var %in% c("Snippet", "Abstract")) {
#             txt_X_df[, paste0(txt_var_pfx, ".P.metropolitan.diary.colon")] <-
#                 as.integer(0 + mycount_pattern_occ("Metropolitan Diary:", 
#                                                    glb_allobs_df[, txt_var]))
#summary(glb_allobs_df[ ,grep("P.on.this.day", names(glb_allobs_df), value=TRUE)])

# glb_derive_lst[["<var1>"]] <- glb_derive_lst[["<var2>"]]

glb_derive_vars <- names(glb_derive_lst)
# tst <- "descr.my"; args_lst <- NULL; for (arg in glb_derive_lst[[tst]]$args) args_lst[[arg]] <- glb_allobs_df[, arg]; print(head(args_lst[[arg]])); print(head(drv_vals <- do.call(glb_derive_lst[[tst]]$mapfn, args_lst))); 
# print(which_ix <- which(args_lst[[arg]] == 0.75)); print(drv_vals[which_ix]); 

glb_date_vars <- NULL # or c("<date_var>")
glb_date_fmts <- list(); #glb_date_fmts[["<date_var>"]] <- "%m/%e/%y"
glb_date_tzs <- list();  #glb_date_tzs[["<date_var>"]] <- "America/New_York"
#grep("America/New", OlsonNames(), value=TRUE)

glb_txt_vars <- c("descr.my")   
Sys.setlocale("LC_ALL", "C") # For english
## [1] "C/C/C/C/C/en_US.UTF-8"
glb_txt_munge_filenames_pfx <- "ebay_mytxt_"

glb_append_stop_words <- list()
# Remember to use unstemmed words
#orderBy(~ -cor.y.abs, subset(glb_feats_df, grepl("[HSA]\\.T\\.", id) & !is.na(cor.high.X)))
glb_append_stop_words[["descr.my"]] <- c(NULL
                                        # freq = 1 
#                                         ,"511","825","975"
#                                         ,"2nd"
#                                         ,"a1314","a1430","a1432"
#     ,"abused","across","adaptor","add","advised","antenna","anti","anyone","anything"
#         ,"applied","applying","area","arizona","att","attached"
#     ,"backlight","backlit","beetle","beginning","besides","bidder","binder"
#         ,"bonus","boot","bound","brick","broke","bruises","buyers"
#     ,"capacity","causing","changed","changing","cherished","chrome","classes","closely"
#         ,"confidence","considerable","consumer","contents","control","cream","cuts"
#     ,"daily","date","daughter"
#         ,"deactivated","decent","deep","defender","defense","degree"
#             ,"demonstration","depicted","depress"
#         ,"difficulty","disclaimer","discoloration","distressed","divider"
#         ,"dlxnqat9g5wt","dock","documents","done","dont","durable","dust","duty"
#     ,"either","emblem","erased","ereader","esi","essentially","etch","etched"
#         ,"every","exact","exhibition","expires"
#     ,"facing","faded","faint","february","film","final","five"
#         ,"flickers","folding","forgot","forwarders","freezes","freight"
#                             ,"games","generic","genuine","glitter","goes","grey","guide"
#             ,"half","hairline","handstand","hdmi","high","higher","hold","hole","hospital"
#         ,"imie","immaculate","impact"
#             ,"instead","intended"
#                 ,"interest","interior","international","internationally","intro"
#                                         ,"jack","july"
#                                         ,"keeps","keyword","kids","kind","known"
#     ,"largest","last","late","length","let","letters","level"
#         ,"lifting","limited","line","lining","liquid","liquidation","literally","literature"
#         ,"local","logic","long","longer","looping","loose","loss","lost"
#                     ,"mb292ll","mc707ll","mc916ll","mc991ll","md789ll","mf432ll","mgye2ll"
#                     ,"mic","middle", "mind","mixed","mostly"
#                                         ,"neither","none","november"
#                                         ,"occasional","oem","often","online","outside"
#     ,"padfolio","pairing","paperwork","past"
#         ,"period","pet","photograph","piece","played","plug"
#         ,"poor","portfolio","portion","pouch"
#         ,"preinstalled","pressure","price","proof","provided"
#     ,"ranging","rather"
#         ,"real","realized","reassemble","reboot","receipt","recently","red"
#             ,"reflected","refunds","remote","repeat"
#             ,"required","reserve","residue","restarts","result","reviewed"
#         ,"ringer","roughly","rubber","running"
#     ,"said","school"
#         ,"seamlessly","seconds","seem","seen","semi","send","september","serious","setup"
#         ,"shell","short","showroom"
#         ,"sighs","site","size","sleeve","slice","smoke","smooth","smudge"
#         ,"softer","software","somewhat","soon"
#         ,"space","sparingly","sparkiling","special","speed","speigen"
#         ,"stains","standup","start","status","stopped","strictly"
#         ,"subtle","sustained","swappacom","swiped","swivel"
#     ,"take","technical","tempered","texture","thank","therefore","think","though"
#         ,"toddler","totally","touchy","toys","tried","typical"
#                                         ,"university","unknown","untouched","upgrade"
#                                         ,"valid","vary","version","virtually"
#                                 ,"want","wavy","website","whole","winning","worn","wrapped"
#                                         ,"zaag","zero", "zombie","zoogue"
                                            )
#subset(glb_allobs_df, S.T.newyorktim > 0)[, c("UniqueID", "Snippet", "S.T.newyorktim")]
#glb_txt_lst[["Snippet"]][which(glb_allobs_df$UniqueID %in% c(8394, 8317, 8339, 8350, 8307))]

glb_important_terms <- list()
# Remember to use stemmed terms 

glb_txt_cor_var <- glb_rsp_var # or "feat"
glb_txt_filter_terms <- "top.val" # select one from c("top.cor", "top.val", "sparse")
glb_txt_top_n <- c(50)
names(glb_txt_top_n) <- glb_txt_vars

glb_sprs_thresholds <- c(0.950) # Generates 8 terms

# Properties:
#   numrows(glb_feats_df) << numrows(glb_fitobs_df)
#   Select terms that appear in at least 0.2 * O(FP/FN(glb_OOBobs_df))
#       numrows(glb_OOBobs_df) = 1.1 * numrows(glb_newobs_df)
names(glb_sprs_thresholds) <- glb_txt_vars

# User-specified exclusions  
glb_exclude_vars_as_features <- c("productline", "description", "startprice"
                                  #, "startprice.log", "sold"
                                  , "prdline.my", "prdline.my.fctr"                                  
                                  ) 
if (glb_rsp_var_raw != glb_rsp_var)
    glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, 
                                            glb_rsp_var_raw)

# List feats that shd be excluded due to known causation by prediction variable
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, 
                                      c(NULL)) # or c("<col_name>")

glb_impute_na_data <- FALSE # or TRUE
glb_mice_complete.seed <- 144 # or any integer

glb_cluster <- TRUE
glb_cluster.seed <- 189 # or any integer
glb_cluster_entropy_var <- glb_rsp_var # or "<feat>"

glb_interaction_only_features <- NULL # or ???

glb_models_lst <- list(); glb_models_df <- data.frame()
# Regression
if (glb_is_regression)
    glb_models_method_vctr <- c("lm", "glm", "bayesglm", "glmnet", "rpart", "rf") else
# Classification
    if (glb_is_binomial)
        glb_models_method_vctr <- c("glm", "bayesglm", "glmnet", "rpart", "rf") else  
        glb_models_method_vctr <- c("rpart", "rf")

# Baseline prediction model feature(s)
glb_Baseline_mdl_var <- NULL # or c("<col_name>")

glb_model_metric_terms <- NULL # or matrix(c(
#                               0,1,2,3,4,
#                               2,0,1,2,3,
#                               4,2,0,1,2,
#                               6,4,2,0,1,
#                               8,6,4,2,0
#                           ), byrow=TRUE, nrow=5)
glb_model_metric <- NULL # or "<metric_name>"
glb_model_metric_maximize <- NULL # or FALSE (TRUE is not the default for both classification & regression) 
glb_model_metric_smmry <- NULL # or function(data, lev=NULL, model=NULL) {
#     confusion_mtrx <- t(as.matrix(confusionMatrix(data$pred, data$obs)))
#     #print(confusion_mtrx)
#     #print(confusion_mtrx * glb_model_metric_terms)
#     metric <- sum(confusion_mtrx * glb_model_metric_terms) / nrow(data)
#     names(metric) <- glb_model_metric
#     return(metric)
# }

glb_tune_models_df <- 
   rbind(
    #data.frame(parameter="cp", min=0.00005, max=0.00005, by=0.000005),
                            #seq(from=0.01,  to=0.01, by=0.01)
    #data.frame(parameter="mtry",  min=080, max=100, by=10),
    #data.frame(parameter="mtry",  min=08, max=10, by=1),    
    data.frame(parameter="dummy", min=2, max=4, by=1)
        ) 
# or NULL
glb_n_cv_folds <- 3 # or NULL

glb_clf_proba_threshold <- NULL # 0.5

# Model selection criteria
if (glb_is_regression)
    glb_model_evl_criteria <- c("min.RMSE.OOB", "max.R.sq.OOB", "max.Adj.R.sq.fit")
    #glb_model_evl_criteria <- c("min.RMSE.fit", "max.R.sq.fit", "max.Adj.R.sq.fit")
if (glb_is_classification) {
    if (glb_is_binomial)
        glb_model_evl_criteria <- 
            c("max.Accuracy.OOB", "max.auc.OOB", "max.Kappa.OOB", "min.aic.fit") else
        glb_model_evl_criteria <- c("max.Accuracy.OOB", "max.Kappa.OOB")
}

glb_sel_mdl_id <- "Ensemble.glmnet" #select from c(NULL, "Ensemble.glmnet", "Low.cor.X.glm")
glb_fin_mdl_id <- NULL # select from c(NULL, glb_sel_mdl_id, "Final")

glb_dsp_cols <- c("sold", ".grpid", "color", "condition", "cellular", "carrier", "storage")

# Depict process
glb_analytics_pn <- petrinet(name="glb_analytics_pn",
                        trans_df=data.frame(id=1:6,
    name=c("data.training.all","data.new",
           "model.selected","model.final",
           "data.training.all.prediction","data.new.prediction"),
    x=c(   -5,-5,-15,-25,-25,-35),
    y=c(   -5, 5,  0,  0, -5,  5)
                        ),
                        places_df=data.frame(id=1:4,
    name=c("bgn","fit.data.training.all","predict.data.new","end"),
    x=c(   -0,   -20,                    -30,               -40),
    y=c(    0,     0,                      0,                 0),
    M0=c(   3,     0,                      0,                 0)
                        ),
                        arcs_df=data.frame(
    begin=c("bgn","bgn","bgn",        
            "data.training.all","model.selected","fit.data.training.all",
            "fit.data.training.all","model.final",    
            "data.new","predict.data.new",
            "data.training.all.prediction","data.new.prediction"),
    end  =c("data.training.all","data.new","model.selected",
            "fit.data.training.all","fit.data.training.all","model.final",
            "data.training.all.prediction","predict.data.new",
            "predict.data.new","data.new.prediction",
            "end","end")
                        ))
#print(ggplot.petrinet(glb_analytics_pn))
print(ggplot.petrinet(glb_analytics_pn) + coord_flip())
## Loading required package: grid

glb_analytics_avl_objs <- NULL

glb_chunks_df <- myadd_chunk(NULL, "import.data")
##         label step_major step_minor   bgn end elapsed
## 1 import.data          1          0 8.653  NA      NA

Step 1.0: import data

chunk option: eval=

#glb_chunks_df <- myadd_chunk(NULL, "import.data")

glb_trnobs_df <- myimport_data(url=glb_trnng_url, comment="glb_trnobs_df", 
                                force_header=TRUE)
## [1] "Reading file ./data/eBayiPadTrain.csv..."
## [1] "dimensions of data in ./data/eBayiPadTrain.csv: 1,861 rows x 11 cols"
##                                                                                            description
## 1                                                        iPad is in 8.5+ out of 10 cosmetic condition!
## 2 Previously used, please read description. May show signs of use such as scratches to the screen and 
## 3                                                                                                     
## 4                                                                                                     
## 5 Please feel free to buy. All products have been thoroughly inspected, cleaned and tested to be 100% 
## 6                                                                                                     
##   biddable startprice               condition cellular carrier      color
## 1        0     159.99                    Used        0    None      Black
## 2        1       0.99                    Used        1 Verizon    Unknown
## 3        0     199.99                    Used        0    None      White
## 4        0     235.00 New other (see details)        0    None    Unknown
## 5        0     199.99      Seller refurbished  Unknown Unknown    Unknown
## 6        1     175.00                    Used        1    AT&T Space Gray
##   storage productline sold UniqueID
## 1      16      iPad 2    0    10001
## 2      16      iPad 2    1    10002
## 3      16      iPad 4    1    10003
## 4      16 iPad mini 2    0    10004
## 5 Unknown     Unknown    0    10005
## 6      32 iPad mini 2    1    10006
##                                                                                                        description
## 65                                                                                                                
## 283                                                              Pristine condition, comes with a case and stylus.
## 948  \211\333\317Used Apple Ipad 16 gig 1st generation in Great working condition and 100% functional.Very little 
## 1354                                                                                                              
## 1366         Item still in complete working order, minor scratches, normal wear and tear but no damage. screen is 
## 1840                                                                                                              
##      biddable startprice          condition cellular carrier      color
## 65          0     195.00               Used        0    None    Unknown
## 283         1      20.00               Used        0    None    Unknown
## 948         0     110.00 Seller refurbished        0    None      Black
## 1354        0     300.00               Used        0    None      White
## 1366        1     125.00               Used  Unknown Unknown    Unknown
## 1840        0     249.99               Used        1  Sprint Space Gray
##      storage productline sold UniqueID
## 65        16   iPad mini    0    10065
## 283       64      iPad 1    0    10283
## 948       32      iPad 1    0    10948
## 1354      16    iPad Air    1    11354
## 1366 Unknown      iPad 1    1    11366
## 1840      16    iPad Air    1    11840
##                                                                                            description
## 1856  Overall item is in good condition and is fully operational and ready to use. Comes with box and 
## 1857 Used. Tested. Guaranteed to work. Physical condition grade B+ does have some light scratches and 
## 1858     This item is brand new and was never used; however, the box and/or packaging has been opened.
## 1859                                                                                                  
## 1860     This unit has minor scratches on case and several small scratches on the display. \nIt is in 
## 1861  30 Day Warranty.  Fully functional engraved iPad 1st Generation with signs of normal wear which 
##      biddable startprice               condition cellular carrier
## 1856        0      89.50                    Used        1    AT&T
## 1857        0     239.95                    Used        0    None
## 1858        0     329.99 New other (see details)        0    None
## 1859        0     400.00                     New        0    None
## 1860        0      89.00      Seller refurbished        0    None
## 1861        0     119.99                    Used        1    AT&T
##           color storage productline sold UniqueID
## 1856    Unknown      16      iPad 1    0    11856
## 1857      Black      32      iPad 4    1    11857
## 1858 Space Gray      16    iPad Air    0    11858
## 1859       Gold      16 iPad mini 3    0    11859
## 1860      Black      64      iPad 1    1    11860
## 1861      Black      64      iPad 1    0    11861
## 'data.frame':    1861 obs. of  11 variables:
##  $ description: chr  "iPad is in 8.5+ out of 10 cosmetic condition!" "Previously used, please read description. May show signs of use such as scratches to the screen and " "" "" ...
##  $ biddable   : int  0 1 0 0 0 1 1 0 1 1 ...
##  $ startprice : num  159.99 0.99 199.99 235 199.99 ...
##  $ condition  : chr  "Used" "Used" "Used" "New other (see details)" ...
##  $ cellular   : chr  "0" "1" "0" "0" ...
##  $ carrier    : chr  "None" "Verizon" "None" "None" ...
##  $ color      : chr  "Black" "Unknown" "White" "Unknown" ...
##  $ storage    : chr  "16" "16" "16" "16" ...
##  $ productline: chr  "iPad 2" "iPad 2" "iPad 4" "iPad mini 2" ...
##  $ sold       : int  0 1 1 0 0 1 1 0 1 1 ...
##  $ UniqueID   : int  10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 ...
##  - attr(*, "comment")= chr "glb_trnobs_df"
## NULL
# glb_trnobs_df <- read.delim("data/hygiene.txt", header=TRUE, fill=TRUE, sep="\t",
#                             fileEncoding='iso-8859-1')
# glb_trnobs_df <- read.table("data/hygiene.dat.labels", col.names=c("dirty"),
#                             na.strings="[none]")
# glb_trnobs_df$review <- readLines("data/hygiene.dat", n =-1)
# comment(glb_trnobs_df) <- "glb_trnobs_df"                                

# glb_trnobs_df <- data.frame()
# for (symbol in c("Boeing", "CocaCola", "GE", "IBM", "ProcterGamble")) {
#     sym_trnobs_df <- 
#         myimport_data(url=gsub("IBM", symbol, glb_trnng_url), comment="glb_trnobs_df", 
#                                     force_header=TRUE)
#     sym_trnobs_df$Symbol <- symbol
#     glb_trnobs_df <- myrbind_df(glb_trnobs_df, sym_trnobs_df)
# }
                                
# glb_trnobs_df <- 
#     glb_trnobs_df %>% dplyr::filter(Year >= 1999)
                                
if (glb_is_separate_newobs_dataset) {
    glb_newobs_df <- myimport_data(url=glb_newdt_url, comment="glb_newobs_df", 
                                   force_header=TRUE)
    
    # To make plots / stats / checks easier in chunk:inspectORexplore.data
    glb_allobs_df <- myrbind_df(glb_trnobs_df, glb_newobs_df); 
    comment(glb_allobs_df) <- "glb_allobs_df"
} else {
    glb_allobs_df <- glb_trnobs_df; comment(glb_allobs_df) <- "glb_allobs_df"
    if (!glb_split_entity_newobs_datasets) {
        stop("Not implemented yet") 
        glb_newobs_df <- glb_trnobs_df[sample(1:nrow(glb_trnobs_df),
                                          max(2, nrow(glb_trnobs_df) / 1000)),]                    
    } else      if (glb_split_newdata_method == "condition") {
            glb_newobs_df <- do.call("subset", 
                list(glb_trnobs_df, parse(text=glb_split_newdata_condition)))
            glb_trnobs_df <- do.call("subset", 
                list(glb_trnobs_df, parse(text=paste0("!(", 
                                                      glb_split_newdata_condition,
                                                      ")"))))
        } else if (glb_split_newdata_method == "sample") {
                require(caTools)
                
                set.seed(glb_split_sample.seed)
                split <- sample.split(glb_trnobs_df[, glb_rsp_var_raw], 
                                      SplitRatio=(1-glb_split_newdata_size_ratio))
                glb_newobs_df <- glb_trnobs_df[!split, ] 
                glb_trnobs_df <- glb_trnobs_df[split ,]
        } else if (glb_split_newdata_method == "copy") {  
            glb_trnobs_df <- glb_allobs_df
            comment(glb_trnobs_df) <- "glb_trnobs_df"
            glb_newobs_df <- glb_allobs_df
            comment(glb_newobs_df) <- "glb_newobs_df"
        } else stop("glb_split_newdata_method should be %in% c('condition', 'sample', 'copy')")   

    comment(glb_newobs_df) <- "glb_newobs_df"
    myprint_df(glb_newobs_df)
    str(glb_newobs_df)

    if (glb_split_entity_newobs_datasets) {
        myprint_df(glb_trnobs_df)
        str(glb_trnobs_df)        
    }
}         
## [1] "Reading file ./data/eBayiPadTest.csv..."
## [1] "dimensions of data in ./data/eBayiPadTest.csv: 798 rows x 10 cols"
##                                                                                                  description
## 1                                                                                                   like new
## 2 Item is in great shape. I upgraded to the iPad Air 2 and don&#039;t need the mini any longer, even though 
## 3        This iPad is working and is tested 100%. It runs great. It is in good condition. Cracked digitizer.
## 4                                                                                                           
## 5        Grade A condition means that the Ipad is 100% working condition. Cosmetically 8/9 out of 10 - Will 
## 6                   Brand new factory sealed iPad in an OPEN BOX...THE BOX ITSELF IS HEAVILY DISTRESSED(see 
##   biddable startprice                condition cellular carrier   color
## 1        0     105.00                     Used        1    AT&T Unknown
## 2        0     195.00                     Used        0    None Unknown
## 3        0     219.99                     Used        0    None Unknown
## 4        1     100.00                     Used        0    None Unknown
## 5        0     210.99 Manufacturer refurbished        0    None   Black
## 6        0     514.95  New other (see details)        0    None    Gold
##   storage productline UniqueID
## 1      32      iPad 1    11862
## 2      16 iPad mini 2    11863
## 3      64      iPad 3    11864
## 4      16   iPad mini    11865
## 5      32      iPad 3    11866
## 6      64  iPad Air 2    11867
##                                                                                               description
## 1                                                                                                like new
## 142                                             iPad mini 1st gen wi-fi 16gb is in perfect working order.
## 309     In excellent condition. Minor scratches on the back. Screen in mint condition. Comes in original 
## 312 iPad is in Great condition, the screen is in great condition showing only a few minor scratches, the 
## 320                                                                   Good condition and fully functional
## 369                                                                                                      
##     biddable startprice condition cellular carrier   color storage
## 1          0     105.00      Used        1    AT&T Unknown      32
## 142        1       0.99      Used        0    None Unknown      16
## 309        0     200.00      Used        1    AT&T   Black      32
## 312        1       0.99      Used        0    None Unknown      16
## 320        1      60.00      Used        0    None   White      16
## 369        1     197.97      Used        0    None Unknown      64
##     productline UniqueID
## 1        iPad 1    11862
## 142   iPad mini    12003
## 309      iPad 3    12170
## 312 iPad mini 2    12173
## 320      iPad 1    12181
## 369 iPad mini 3    12230
##                                                                                              description
## 793  Crack on digitizer near top. Top line of digitizer does not respond to touch. Other than that, all 
## 794                                                                                                     
## 795                                                                                                     
## 796                                                                                                     
## 797                                                                                                     
## 798 Slightly Used. Includes everything you need plus a nice leather case!\nThere is a slice mark on the 
##     biddable startprice                condition cellular carrier   color
## 793        0     104.00 For parts or not working        1 Unknown   Black
## 794        0      95.00                     Used        1    AT&T Unknown
## 795        1     199.99 Manufacturer refurbished        0    None   White
## 796        0     149.99                     Used        0    None Unknown
## 797        0       7.99                      New  Unknown Unknown Unknown
## 798        0     139.00                     Used        1 Unknown   Black
##     storage productline UniqueID
## 793      16      iPad 2    12654
## 794      64      iPad 1    12655
## 795      16      iPad 4    12656
## 796      16      iPad 2    12657
## 797 Unknown      iPad 3    12658
## 798      32     Unknown    12659
## 'data.frame':    798 obs. of  10 variables:
##  $ description: chr  "like new" "Item is in great shape. I upgraded to the iPad Air 2 and don&#039;t need the mini any longer, even though " "This iPad is working and is tested 100%. It runs great. It is in good condition. Cracked digitizer." "" ...
##  $ biddable   : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ startprice : num  105 195 220 100 211 ...
##  $ condition  : chr  "Used" "Used" "Used" "Used" ...
##  $ cellular   : chr  "1" "0" "0" "0" ...
##  $ carrier    : chr  "AT&T" "None" "None" "None" ...
##  $ color      : chr  "Unknown" "Unknown" "Unknown" "Unknown" ...
##  $ storage    : chr  "32" "16" "64" "16" ...
##  $ productline: chr  "iPad 1" "iPad mini 2" "iPad 3" "iPad mini" ...
##  $ UniqueID   : int  11862 11863 11864 11865 11866 11867 11868 11869 11870 11871 ...
##  - attr(*, "comment")= chr "glb_newobs_df"
## NULL
if ((num_nas <- sum(is.na(glb_trnobs_df[, glb_rsp_var_raw]))) > 0)
    stop("glb_trnobs_df$", glb_rsp_var_raw, " contains NAs for ", num_nas, " obs")

if (nrow(glb_trnobs_df) == nrow(glb_allobs_df))
    warning("glb_trnobs_df same as glb_allobs_df")
if (nrow(glb_newobs_df) == nrow(glb_allobs_df))
    warning("glb_newobs_df same as glb_allobs_df")

if (length(glb_drop_vars) > 0) {
    warning("dropping vars: ", paste0(glb_drop_vars, collapse=", "))
    glb_allobs_df <- glb_allobs_df[, setdiff(names(glb_allobs_df), glb_drop_vars)]
    glb_trnobs_df <- glb_trnobs_df[, setdiff(names(glb_trnobs_df), glb_drop_vars)]    
    glb_newobs_df <- glb_newobs_df[, setdiff(names(glb_newobs_df), glb_drop_vars)]    
}

#stop(here"); sav_allobs_df <- glb_allobs_df # glb_allobs_df <- sav_allobs_df
# Combine trnent & newobs into glb_allobs_df for easier manipulation
glb_trnobs_df$.src <- "Train"; glb_newobs_df$.src <- "Test"; 
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, ".src")
glb_allobs_df <- myrbind_df(glb_trnobs_df, glb_newobs_df)
comment(glb_allobs_df) <- "glb_allobs_df"

# Check for duplicates in glb_id_var
if (length(glb_id_var) == 0) {
    warning("using .rownames as identifiers for observations")
    glb_allobs_df$.rownames <- rownames(glb_allobs_df)
    glb_trnobs_df$.rownames <- rownames(subset(glb_allobs_df, .src == "Train"))
    glb_newobs_df$.rownames <- rownames(subset(glb_allobs_df, .src == "Test"))    
    glb_id_var <- ".rownames"
}
if (sum(duplicated(glb_allobs_df[, glb_id_var, FALSE])) > 0)
    stop(glb_id_var, " duplicated in glb_allobs_df")
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_id_var)

glb_allobs_df <- orderBy(reformulate(glb_id_var), glb_allobs_df)
glb_trnobs_df <- glb_newobs_df <- NULL

# For Tableau
write.csv(glb_allobs_df, "data/eBayiPadAll.csv", row.names=FALSE)

#stop(here")
glb_drop_obs <- c(
                11234, #sold=0; 2 other dups(10306, 11503) are sold=1
                11844, #sold=0; 3 other dups(11721, 11738, 11812) are sold=1
                  NULL)
glb_allobs_df <- glb_allobs_df[!glb_allobs_df[, glb_id_var] %in% glb_drop_obs, ]


# Make any data corrections here
glb_allobs_df[glb_allobs_df[, glb_id_var] == 10986, "cellular"] <- "1"
glb_allobs_df[glb_allobs_df[, glb_id_var] == 10986, "carrier"] <- "T-Mobile"

# Check for duplicates by all features
require(gdata)
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## 
## The following object is masked from 'package:stats':
## 
##     nobs
## 
## The following object is masked from 'package:utils':
## 
##     object.size
#print(names(glb_allobs_df))
dup_allobs_df <- glb_allobs_df[duplicated2(subset(glb_allobs_df, 
                                                  select=-c(UniqueID, sold, .src))), ]
dup_allobs_df <- orderBy(~productline+description+startprice+biddable, dup_allobs_df)
print(sprintf("Found %d duplicates by all features:", nrow(dup_allobs_df)))
## [1] "Found 304 duplicates by all features:"
myprint_df(dup_allobs_df)
##      description biddable startprice                condition cellular
## 1711                    1       0.99 For parts or not working  Unknown
## 2608                    1       0.99 For parts or not working  Unknown
## 293                     1       5.00                     Used  Unknown
## 478                     1       5.00                     Used  Unknown
## 385                     0      15.00                     Used        0
## 390                     0      15.00                     Used        0
##      carrier   color storage productline sold UniqueID  .src
## 1711 Unknown Unknown      16     Unknown    1    11711 Train
## 2608 Unknown Unknown      16     Unknown   NA    12608  Test
## 293  Unknown   White      16     Unknown    1    10293 Train
## 478  Unknown   White      16     Unknown    1    10478 Train
## 385     None   Black      16     Unknown    0    10385 Train
## 390     None   Black      16     Unknown    0    10390 Train
##      description biddable startprice                condition cellular
## 1956                    1       0.99                     Used        0
## 828                     1     249.97 Manufacturer refurbished        1
## 3                       0     199.99                     Used        0
## 1649                    0     209.00 For parts or not working  Unknown
## 2111                    1     200.00                     Used        0
## 172                     0     269.00                     Used        0
##      carrier      color storage productline sold UniqueID  .src
## 1956    None    Unknown      16      iPad 2   NA    11956  Test
## 828  Unknown      Black      64      iPad 2    0    10828 Train
## 3       None      White      16      iPad 4    1    10003 Train
## 1649 Unknown    Unknown      16    iPad Air    0    11649 Train
## 2111    None Space Gray      64 iPad mini 2   NA    12111  Test
## 172     None    Unknown      32 iPad mini 2    0    10172 Train
##      description biddable startprice condition cellular carrier color
## 8                       0     329.99       New        0    None White
## 660                     0     329.99       New        0    None White
## 319                     0     345.00       New        0    None  Gold
## 1886                    0     345.00       New        0    None  Gold
## 1363                    0     498.88       New        1 Verizon  Gold
## 1394                    0     498.88       New        1 Verizon  Gold
##      storage productline sold UniqueID  .src
## 8         16 iPad mini 3    0    10008 Train
## 660       16 iPad mini 3    0    10660 Train
## 319       16 iPad mini 3    1    10319 Train
## 1886      16 iPad mini 3   NA    11886  Test
## 1363      16 iPad mini 3    0    11363 Train
## 1394      16 iPad mini 3    0    11394 Train
# print(dup_allobs_df[, c(glb_id_var, glb_rsp_var_raw, 
#                          "description", "startprice", "biddable")])
# write.csv(dup_allobs_df[, c("UniqueID"), FALSE], "ebayipads_dups.csv", row.names=FALSE)

dupobs_df <- tidyr::unite(dup_allobs_df, "allfeats", -c(sold, UniqueID, .src), sep="#")
# dupobs_df <- dplyr::group_by(dupobs_df, allfeats)
# dupobs_df <- dupobs_df[, "UniqueID", FALSE]
# dupobs_df <- ungroup(dupobs_df)
# 
# dupobs_df$.rownames <- row.names(dupobs_df)
grpobs_df <- data.frame(allfeats=unique(dupobs_df[, "allfeats"]))
grpobs_df$.grpid <- row.names(grpobs_df)
dupobs_df <- merge(dupobs_df, grpobs_df)

# dupobs_tbl <- table(dupobs_df$.grpid)
# print(max(dupobs_tbl))
# print(dupobs_tbl[which.max(dupobs_tbl)])
# print(dupobs_df[dupobs_df$.grpid == names(dupobs_tbl[which.max(dupobs_tbl)]), ])
# print(dupobs_df[dupobs_df$.grpid == 106, ])
# for (grpid in c(9, 17, 31, 36, 53))
#     print(dupobs_df[dupobs_df$.grpid == grpid, ])
dupgrps_df <- as.data.frame(table(dupobs_df$.grpid, dupobs_df$sold, useNA="ifany"))
names(dupgrps_df)[c(1,2)] <- c(".grpid", "sold")
dupgrps_df$.grpid <- as.numeric(as.character(dupgrps_df$.grpid))
dupgrps_df <- tidyr::spread(dupgrps_df, sold, Freq)
names(dupgrps_df)[-1] <- paste("sold", names(dupgrps_df)[-1], sep=".")
dupgrps_df$.freq <- sapply(1:nrow(dupgrps_df), function(row) sum(dupgrps_df[row, -1]))
myprint_df(orderBy(~-.freq, dupgrps_df))
##     .grpid sold.0 sold.1 sold.NA .freq
## 40      40      0      6       3     9
## 106    106      0      4       1     5
## 9        9      0      1       3     4
## 17      17      0      3       1     4
## 36      36      0      3       1     4
## 53      53      0      2       2     4
##     .grpid sold.0 sold.1 sold.NA .freq
## 10      10      0      2       0     2
## 42      42      0      1       1     2
## 57      57      1      0       1     2
## 66      66      1      0       1     2
## 91      91      0      1       1     2
## 101    101      0      1       1     2
##     .grpid sold.0 sold.1 sold.NA .freq
## 130    130      1      0       1     2
## 131    131      1      1       0     2
## 132    132      0      1       1     2
## 133    133      2      0       0     2
## 134    134      0      1       1     2
## 135    135      2      0       0     2
print("sold Conflicts:")
## [1] "sold Conflicts:"
print(subset(dupgrps_df, (sold.0 > 0) & (sold.1 > 0)))
##     .grpid sold.0 sold.1 sold.NA .freq
## 4        4      1      1       0     2
## 22      22      1      1       0     2
## 23      23      1      1       0     2
## 74      74      1      1       0     2
## 83      83      1      1       0     2
## 84      84      1      1       0     2
## 95      95      1      1       0     2
## 102    102      1      1       0     2
## 109    109      1      1       0     2
## 111    111      1      1       0     2
## 122    122      1      1       0     2
## 131    131      1      1       0     2
#dupobs_df[dupobs_df$.grpid == 4, ]
if (nrow(subset(dupgrps_df, (sold.0 > 0) & (sold.1 > 0) & (sold.0 != sold.1))) > 0)
    stop("Duplicate conflicts are resolvable")

print("Test & Train Groups:")
## [1] "Test & Train Groups:"
print(subset(dupgrps_df, (sold.NA > 0)))
##     .grpid sold.0 sold.1 sold.NA .freq
## 1        1      0      1       1     2
## 5        5      1      0       1     2
## 7        7      0      0       2     2
## 8        8      1      0       1     2
## 9        9      0      1       3     4
## 12      12      0      0       2     2
## 14      14      0      1       1     2
## 15      15      0      0       2     2
## 17      17      0      3       1     4
## 18      18      0      2       1     3
## 19      19      0      2       1     3
## 24      24      0      2       1     3
## 26      26      1      0       1     2
## 28      28      1      0       1     2
## 30      30      0      1       1     2
## 32      32      0      0       2     2
## 33      33      0      1       1     2
## 35      35      0      2       1     3
## 36      36      0      3       1     4
## 37      37      0      0       2     2
## 38      38      0      1       1     2
## 40      40      0      6       3     9
## 41      41      0      0       2     2
## 42      42      0      1       1     2
## 43      43      0      1       1     2
## 44      44      0      2       1     3
## 47      47      0      1       1     2
## 48      48      0      0       2     2
## 49      49      0      1       2     3
## 51      51      0      1       1     2
## 53      53      0      2       2     4
## 54      54      0      1       1     2
## 55      55      1      0       2     3
## 56      56      1      0       1     2
## 57      57      1      0       1     2
## 58      58      0      0       2     2
## 59      59      1      0       1     2
## 60      60      1      0       1     2
## 63      63      0      1       1     2
## 66      66      1      0       1     2
## 67      67      1      0       1     2
## 68      68      0      0       2     2
## 69      69      1      0       1     2
## 73      73      0      1       1     2
## 76      76      0      2       1     3
## 86      86      0      0       2     2
## 87      87      1      0       1     2
## 89      89      1      0       1     2
## 90      90      0      0       2     2
## 91      91      0      1       1     2
## 93      93      0      1       1     2
## 94      94      1      0       1     2
## 99      99      0      1       1     2
## 101    101      0      1       1     2
## 103    103      0      1       1     2
## 104    104      1      0       1     2
## 106    106      0      4       1     5
## 107    107      0      1       1     2
## 108    108      0      1       1     2
## 112    112      1      0       1     2
## 114    114      0      1       1     2
## 115    115      0      1       1     2
## 116    116      1      0       1     2
## 117    117      0      2       1     3
## 118    118      0      1       1     2
## 121    121      1      0       1     2
## 124    124      1      0       1     2
## 128    128      0      1       1     2
## 130    130      1      0       1     2
## 132    132      0      1       1     2
## 134    134      0      1       1     2
glb_allobs_df <- merge(glb_allobs_df, dupobs_df[, c(glb_id_var, ".grpid")], 
                       by=glb_id_var, all.x=TRUE)
glb_exclude_vars_as_features <- c(".grpid", glb_exclude_vars_as_features)

# !_sp
spd_allobs_df <- read.csv(paste0(glb_out_pfx, "sp_predict.csv"))
if (nrow(spd_allobs_df) != nrow(glb_allobs_df))
    stop("mismatches between spd_allobs_df & glb_allobs_df")
mrg_allobs_df <- merge(glb_allobs_df, spd_allobs_df)
if (nrow(mrg_allobs_df) != nrow(glb_allobs_df))
    stop("mismatches between mrg_allobs_df & glb_allobs_df")
mrg_allobs_df$startprice.diff <- mrg_allobs_df$startprice -
                                 mrg_allobs_df$startprice.predict.
print(myplot_scatter(mrg_allobs_df, "startprice", "startprice.diff", 
                     colorcol_name = "biddable"))
## Warning in myplot_scatter(mrg_allobs_df, "startprice", "startprice.diff", :
## converting biddable to class:factor

print(myplot_histogram(mrg_allobs_df, "startprice.diff", 
                     fill_col_name = "biddable"))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

glb_allobs_df <- mrg_allobs_df
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, 
                                  "startprice.log", "startprice.predict.")
###                                  

#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
# Only for _sp
# print(table(glb_allobs_df$sold, glb_allobs_df$.src, useNA = "ifany"))
# print(table(glb_allobs_df$sold, glb_allobs_df$biddable, glb_allobs_df$.src, 
#             useNA = "ifany"))
# glb_allobs_df$.src <- "Test"
# glb_allobs_df[!is.na(glb_allobs_df$sold) & (glb_allobs_df$sold == 1), ".src"] <- "Train"
# print(table(glb_allobs_df$sold, glb_allobs_df$.src, useNA = "ifany"))
# print(table(glb_allobs_df$sold, glb_allobs_df$biddable, glb_allobs_df$.src, 
#             useNA = "ifany"))
###

glb_chunks_df <- myadd_chunk(glb_chunks_df, "inspect.data", major.inc=TRUE)
##          label step_major step_minor    bgn   end elapsed
## 1  import.data          1          0  8.653 12.56   3.907
## 2 inspect.data          2          0 12.560    NA      NA

Step 2.0: inspect data

#print(str(glb_allobs_df))
#View(glb_allobs_df)

dsp_class_dstrb <- function(var) {
    xtab_df <- mycreate_xtab_df(glb_allobs_df, c(".src", var))
    rownames(xtab_df) <- xtab_df$.src
    xtab_df <- subset(xtab_df, select=-.src)
    print(xtab_df)
    print(xtab_df / rowSums(xtab_df, na.rm=TRUE))    
}    

# Performed repeatedly in other chunks
glb_chk_data <- function() {
    # Histogram of predictor in glb_trnobs_df & glb_newobs_df
    print(myplot_histogram(glb_allobs_df, glb_rsp_var_raw) + facet_wrap(~ .src))
    
    if (glb_is_classification) 
        dsp_class_dstrb(var=ifelse(glb_rsp_var %in% names(glb_allobs_df), 
                                   glb_rsp_var, glb_rsp_var_raw))
    mycheck_problem_data(glb_allobs_df)
}
glb_chk_data()
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Loading required package: reshape2

##       sold.0 sold.1 sold.NA
## Test      NA     NA     798
## Train    999    860      NA
##          sold.0    sold.1 sold.NA
## Test         NA        NA       1
## Train 0.5373857 0.4626143      NA
## [1] "numeric data missing in : "
## sold 
##  798 
## [1] "numeric data w/ 0s in : "
## biddable     sold 
##     1444      999 
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description   condition    cellular     carrier       color     storage 
##        1520           0           0           0           0           0 
## productline      .grpid 
##           0          NA
# Create new features that help diagnostics
if (!is.null(glb_map_rsp_raw_to_var)) {
    glb_allobs_df[, glb_rsp_var] <- 
        glb_map_rsp_raw_to_var(glb_allobs_df[, glb_rsp_var_raw])
    mycheck_map_results(mapd_df=glb_allobs_df, 
                        from_col_name=glb_rsp_var_raw, to_col_name=glb_rsp_var)
        
    if (glb_is_classification) dsp_class_dstrb(glb_rsp_var)
}
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: tcltk
##   sold sold.fctr  .n
## 1    0         N 999
## 2    1         Y 860
## 3   NA      <NA> 798
## Warning: Removed 1 rows containing missing values (position_stack).

##       sold.fctr.N sold.fctr.Y sold.fctr.NA
## Test           NA          NA          798
## Train         999         860           NA
##       sold.fctr.N sold.fctr.Y sold.fctr.NA
## Test           NA          NA            1
## Train   0.5373857   0.4626143           NA
# check distribution of all numeric data
dsp_numeric_feats_dstrb <- function(feats_vctr) {
    for (feat in feats_vctr) {
        print(sprintf("feat: %s", feat))
        if (glb_is_regression)
            gp <- myplot_scatter(df=glb_allobs_df, ycol_name=glb_rsp_var, xcol_name=feat,
                                 smooth=TRUE)
        if (glb_is_classification)
            gp <- myplot_box(df=glb_allobs_df, ycol_names=feat, xcol_name=glb_rsp_var)
        if (inherits(glb_allobs_df[, feat], "factor"))
            gp <- gp + facet_wrap(reformulate(feat))
        print(gp)
    }
}
# dsp_numeric_vars_dstrb(setdiff(names(glb_allobs_df), 
#                                 union(myfind_chr_cols_df(glb_allobs_df), 
#                                       c(glb_rsp_var_raw, glb_rsp_var))))                                      

add_new_diag_feats <- function(obs_df, ref_df=glb_allobs_df) {
    require(plyr)
    
    set.seed(169)
    obs_df <- mutate(obs_df,
#         <col_name>.NA=is.na(<col_name>),

#         <col_name>.fctr=factor(<col_name>, 
#                     as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))), 
#         <col_name>.fctr=relevel(factor(<col_name>, 
#                     as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))),
#                                   "<ref_val>"), 
#         <col2_name>.fctr=relevel(factor(ifelse(<col1_name> == <val>, "<oth_val>", "<ref_val>")), 
#                               as.factor(c("R", "<ref_val>")),
#                               ref="<ref_val>"),

          # This doesn't work - use sapply instead
#         <col_name>.fctr_num=grep(<col_name>, levels(<col_name>.fctr)), 
#         
#         Date.my=as.Date(strptime(Date, "%m/%d/%y %H:%M")),
#         Year=year(Date.my),
#         Month=months(Date.my),
#         Weekday=weekdays(Date.my)

#         <col_name>=<table>[as.character(<col2_name>)],
#         <col_name>=as.numeric(<col2_name>),

#         <col_name> = trunc(<col2_name> / 100),

        .rnorm = rnorm(n=nrow(obs_df))
                        )

    # If levels of a factor are different across obs_df & glb_newobs_df; predict.glm fails  
    # Transformations not handled by mutate
#     obs_df$<col_name>.fctr.num <- sapply(1:nrow(obs_df), 
#         function(row_ix) grep(obs_df[row_ix, "<col_name>"],
#                               levels(obs_df[row_ix, "<col_name>.fctr"])))
    
    #print(summary(obs_df))
    #print(sapply(names(obs_df), function(col) sum(is.na(obs_df[, col]))))
    return(obs_df)
}
glb_allobs_df <- add_new_diag_feats(glb_allobs_df)
## Loading required package: plyr
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:gdata':
## 
##     combine, first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#stop(here"); sav_allobs_df <- glb_allobs_df # glb_allobs_df <- sav_allobs_df
# Merge some <descriptor>
# glb_allobs_df$<descriptor>.my <- glb_allobs_df$<descriptor>
# glb_allobs_df[grepl("\\bAIRPORT\\b", glb_allobs_df$<descriptor>.my),
#               "<descriptor>.my"] <- "AIRPORT"
# glb_allobs_df$<descriptor>.my <-
#     plyr::revalue(glb_allobs_df$<descriptor>.my, c(
#         "ABANDONED BUILDING" = "OTHER",
#         "##"                      = "##"
#     ))
# print(<descriptor>_freq_df <- mycreate_sqlxtab_df(glb_allobs_df, c("<descriptor>.my")))
# # print(dplyr::filter(<descriptor>_freq_df, grepl("(MEDICAL|DENTAL|OFFICE)", <descriptor>.my)))
# # print(dplyr::filter(dplyr::select(glb_allobs_df, -<var.zoo>), 
# #                     grepl("STORE", <descriptor>.my)))
# glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, "<descriptor>")

# Check distributions of newly transformed / extracted vars
#   Enhancement: remove vars that were displayed ealier
dsp_numeric_feats_dstrb(feats_vctr=setdiff(names(glb_allobs_df), 
        c(myfind_chr_cols_df(glb_allobs_df), glb_rsp_var_raw, glb_rsp_var, 
          glb_exclude_vars_as_features)))
## [1] "feat: biddable"

## [1] "feat: startprice.diff"

## [1] "feat: .rnorm"

#   Convert factors to dummy variables
#   Build splines   require(splines); bsBasis <- bs(training$age, df=3)

#pairs(subset(glb_trnobs_df, select=-c(col_symbol)))
# Check for glb_newobs_df & glb_trnobs_df features range mismatches

# Other diagnostics:
# print(subset(glb_trnobs_df, <col1_name> == max(glb_trnobs_df$<col1_name>, na.rm=TRUE) & 
#                         <col2_name> <= mean(glb_trnobs_df$<col1_name>, na.rm=TRUE)))

# print(glb_trnobs_df[which.max(glb_trnobs_df$<col_name>),])

# print(<col_name>_freq_glb_trnobs_df <- mycreate_tbl_df(glb_trnobs_df, "<col_name>"))
# print(which.min(table(glb_trnobs_df$<col_name>)))
# print(which.max(table(glb_trnobs_df$<col_name>)))
# print(which.max(table(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>)[, 2]))
# print(table(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>))
# print(table(is.na(glb_trnobs_df$<col1_name>), glb_trnobs_df$<col2_name>))
# print(table(sign(glb_trnobs_df$<col1_name>), glb_trnobs_df$<col2_name>))
# print(mycreate_xtab_df(glb_trnobs_df, <col1_name>))
# print(mycreate_xtab_df(glb_trnobs_df, c(<col1_name>, <col2_name>)))
# print(<col1_name>_<col2_name>_xtab_glb_trnobs_df <- 
#   mycreate_xtab_df(glb_trnobs_df, c("<col1_name>", "<col2_name>")))
# <col1_name>_<col2_name>_xtab_glb_trnobs_df[is.na(<col1_name>_<col2_name>_xtab_glb_trnobs_df)] <- 0
# print(<col1_name>_<col2_name>_xtab_glb_trnobs_df <- 
#   mutate(<col1_name>_<col2_name>_xtab_glb_trnobs_df, 
#             <col3_name>=(<col1_name> * 1.0) / (<col1_name> + <col2_name>))) 
# print(mycreate_sqlxtab_df(glb_allobs_df, c("<col1_name>", "<col2_name>")))

# print(<col2_name>_min_entity_arr <- 
#    sort(tapply(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>, min, na.rm=TRUE)))
# print(<col1_name>_na_by_<col2_name>_arr <- 
#    sort(tapply(glb_trnobs_df$<col1_name>.NA, glb_trnobs_df$<col2_name>, mean, na.rm=TRUE)))

# Other plots:
# print(myplot_box(df=glb_trnobs_df, ycol_names="<col1_name>"))
# print(myplot_box(df=glb_trnobs_df, ycol_names="<col1_name>", xcol_name="<col2_name>"))
# print(myplot_line(subset(glb_trnobs_df, Symbol %in% c("CocaCola", "ProcterGamble")), 
#                   "Date.POSIX", "StockPrice", facet_row_colnames="Symbol") + 
#     geom_vline(xintercept=as.numeric(as.POSIXlt("2003-03-01"))) +
#     geom_vline(xintercept=as.numeric(as.POSIXlt("1983-01-01")))        
#         )
# print(myplot_line(subset(glb_trnobs_df, Date.POSIX > as.POSIXct("2004-01-01")), 
#                   "Date.POSIX", "StockPrice") +
#     geom_line(aes(color=Symbol)) + 
#     coord_cartesian(xlim=c(as.POSIXct("1990-01-01"),
#                            as.POSIXct("2000-01-01"))) +     
#     coord_cartesian(ylim=c(0, 250)) +     
#     geom_vline(xintercept=as.numeric(as.POSIXlt("1997-09-01"))) +
#     geom_vline(xintercept=as.numeric(as.POSIXlt("1997-11-01")))        
#         )
# print(myplot_scatter(glb_allobs_df, "<col1_name>", "<col2_name>", smooth=TRUE))
# print(myplot_scatter(glb_allobs_df, "<col1_name>", "<col2_name>", colorcol_name="<Pred.fctr>") + 
#         geom_point(data=subset(glb_allobs_df, <condition>), 
#                     mapping=aes(x=<x_var>, y=<y_var>), color="red", shape=4, size=5) +
#         geom_vline(xintercept=84))

glb_chunks_df <- myadd_chunk(glb_chunks_df, "scrub.data", major.inc=FALSE)
##          label step_major step_minor   bgn   end elapsed
## 2 inspect.data          2          0 12.56 16.48    3.92
## 3   scrub.data          2          1 16.48    NA      NA

Step 2.1: scrub data

mycheck_problem_data(glb_allobs_df)
## [1] "numeric data missing in : "
##      sold sold.fctr 
##       798       798 
## [1] "numeric data w/ 0s in : "
## biddable     sold 
##     1444      999 
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description   condition    cellular     carrier       color     storage 
##        1520           0           0           0           0           0 
## productline      .grpid 
##           0          NA
findOffendingCharacter <- function(x, maxStringLength=256){  
  print(x)
  for (c in 1:maxStringLength){
    offendingChar <- substr(x,c,c)
    #print(offendingChar) #uncomment if you want the indiv characters printed
    #the next character is the offending multibyte Character
  }    
}
# string_vector <- c("test", "Se\x96ora", "works fine")
# lapply(string_vector, findOffendingCharacter)
# lapply(glb_allobs_df$description[29], findOffendingCharacter)

dsp_hdlxtab <- function(str) 
    print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains=str), ],
                           c("Headline.pfx", "Headline", glb_rsp_var)))
#dsp_hdlxtab("(1914)|(1939)")

dsp_catxtab <- function(str) 
    print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains=str), ],
        c("Headline.pfx", "NewsDesk", "SectionName", "SubsectionName", glb_rsp_var)))
# dsp_catxtab("1914)|(1939)")
# dsp_catxtab("19(14|39|64):")
# dsp_catxtab("19..:")

# Merge some categories
# glb_allobs_df$myCategory <-
#     plyr::revalue(glb_allobs_df$myCategory, c(      
#         "#Business Day#Dealbook"            = "Business#Business Day#Dealbook",
#         "#Business Day#Small Business"      = "Business#Business Day#Small Business",
#         "dummy" = "dummy"
#     ))

# ctgry_xtab_df <- orderBy(reformulate(c("-", ".n")),
#                           mycreate_sqlxtab_df(glb_allobs_df,
#     c("myCategory", "NewsDesk", "SectionName", "SubsectionName", glb_rsp_var)))
# myprint_df(ctgry_xtab_df)
# write.table(ctgry_xtab_df, paste0(glb_out_pfx, "ctgry_xtab.csv"), 
#             row.names=FALSE)

# ctgry_cast_df <- orderBy(~ -Y -NA, dcast(ctgry_xtab_df, 
#                        myCategory + NewsDesk + SectionName + SubsectionName ~ 
#                            Popular.fctr, sum, value.var=".n"))
# myprint_df(ctgry_cast_df)
# write.table(ctgry_cast_df, paste0(glb_out_pfx, "ctgry_cast.csv"), 
#             row.names=FALSE)

# print(ctgry_sum_tbl <- table(glb_allobs_df$myCategory, glb_allobs_df[, glb_rsp_var], 
#                              useNA="ifany"))

dsp_chisq.test <- function(...) {
    sel_df <- glb_allobs_df[sel_obs(...) & 
                            !is.na(glb_allobs_df$Popular), ]
    sel_df$.marker <- 1
    ref_df <- glb_allobs_df[!is.na(glb_allobs_df$Popular), ]
    mrg_df <- merge(ref_df[, c(glb_id_var, "Popular")],
                    sel_df[, c(glb_id_var, ".marker")], all.x=TRUE)
    mrg_df[is.na(mrg_df)] <- 0
    print(mrg_tbl <- table(mrg_df$.marker, mrg_df$Popular))
    print("Rows:Selected; Cols:Popular")
    #print(mrg_tbl)
    print(chisq.test(mrg_tbl))
}
# dsp_chisq.test(Headline.contains="[Ee]bola")
# dsp_chisq.test(Snippet.contains="[Ee]bola")
# dsp_chisq.test(Abstract.contains="[Ee]bola")

# print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains="[Ee]bola"), ], 
#                           c(glb_rsp_var, "NewsDesk", "SectionName", "SubsectionName")))

# print(table(glb_allobs_df$NewsDesk, glb_allobs_df$SectionName))
# print(table(glb_allobs_df$SectionName, glb_allobs_df$SubsectionName))
# print(table(glb_allobs_df$NewsDesk, glb_allobs_df$SectionName, glb_allobs_df$SubsectionName))

# glb_allobs_df$myCategory.fctr <- as.factor(glb_allobs_df$myCategory)
# glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, 
#                                       c("myCategory", "NewsDesk", "SectionName", "SubsectionName"))

print(table(glb_allobs_df$cellular, glb_allobs_df$carrier, useNA="ifany"))
##          
##           AT&T None Other Sprint T-Mobile Unknown Verizon
##   0          0 1593     0      0        0       0       0
##   1        288    0     4     36       28     172     196
##   Unknown    4    4     2      0        0     330       0
# glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) & 
#               (glb_allobs_df$carrier %in% c("AT&T", "Other")), 
#               c(glb_id_var, glb_rsp_var_raw, "description", "carrier", "cellular")]
glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) & 
              (glb_allobs_df$carrier %in% c("AT&T", "Other")), 
              "cellular"] <- "1"
# glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) & 
#               (glb_allobs_df$carrier %in% c("None")), 
#               c(glb_id_var, glb_rsp_var_raw, "description", "carrier", "cellular")]
glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) & 
              (glb_allobs_df$carrier %in% c("None")), 
              "cellular"] <- "0"
print(table(glb_allobs_df$cellular, glb_allobs_df$carrier, useNA="ifany"))
##          
##           AT&T None Other Sprint T-Mobile Unknown Verizon
##   0          0 1597     0      0        0       0       0
##   1        292    0     6     36       28     172     196
##   Unknown    0    0     0      0        0     330       0

Step 2.1: scrub data

glb_chunks_df <- myadd_chunk(glb_chunks_df, "transform.data", major.inc=FALSE)
##            label step_major step_minor    bgn    end elapsed
## 3     scrub.data          2          1 16.480 17.229   0.749
## 4 transform.data          2          2 17.229     NA      NA
### Mapping dictionary
#sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
if (!is.null(glb_map_vars)) {
    for (feat in glb_map_vars) {
        map_df <- myimport_data(url=glb_map_urls[[feat]], 
                                            comment="map_df", 
                                           print_diagn=TRUE)
        glb_allobs_df <- mymap_codes(glb_allobs_df, feat, names(map_df)[2], 
                                     map_df, map_join_col_name=names(map_df)[1], 
                                     map_tgt_col_name=names(map_df)[2])
    }
    glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_map_vars)
}

### Forced Assignments
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
for (feat in glb_assign_vars) {
    new_feat <- paste0(feat, ".my")
    print(sprintf("Forced Assignments for: %s -> %s...", feat, new_feat))
    glb_allobs_df[, new_feat] <- glb_allobs_df[, feat]
    
    pairs <- glb_assign_pairs_lst[[feat]]
    for (pair_ix in 1:length(pairs$from)) {
        if (is.na(pairs$from[pair_ix]))
            nobs <- nrow(filter(glb_allobs_df, 
                                is.na(eval(parse(text=feat),
                                            envir=glb_allobs_df)))) else
            nobs <- sum(glb_allobs_df[, feat] == pairs$from[pair_ix])
        #nobs <- nrow(filter(glb_allobs_df, is.na(Married.fctr)))    ; print(nobs)
        
        if ((is.na(pairs$from[pair_ix])) && (is.na(pairs$to[pair_ix])))
            stop("what are you trying to do ???")
        if (is.na(pairs$from[pair_ix]))
            glb_allobs_df[is.na(glb_allobs_df[, feat]), new_feat] <- 
                pairs$to[pair_ix] else
            glb_allobs_df[glb_allobs_df[, feat] == pairs$from[pair_ix], new_feat] <- 
                pairs$to[pair_ix]
                    
        print(sprintf("    %s -> %s for %s obs", 
                      pairs$from[pair_ix], pairs$to[pair_ix], format(nobs, big.mark=",")))
    }

    glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_assign_vars)
}

### Derivations using mapping functions
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
for (new_feat in glb_derive_vars) {
    print(sprintf("Creating new feature: %s...", new_feat))
    args_lst <- NULL 
    for (arg in glb_derive_lst[[new_feat]]$args) 
        args_lst[[arg]] <- glb_allobs_df[, arg]
    glb_allobs_df[, new_feat] <- do.call(glb_derive_lst[[new_feat]]$mapfn, args_lst)
}
## [1] "Creating new feature: idseq.my..."
## [1] "Creating new feature: prdline.my..."
## [1] "Creating new feature: startprice.log..."
## [1] "Creating new feature: descr.my..."
#stop(here")
#hex_vctr <- c("\n", "\211", "\235", "\317", "\333")
hex_regex <- paste0(c("\n", "\211", "\235", "\317", "\333"), collapse="|")
for (obs_id in c(10029, 10948, 10136, 10178, 11514, 11904, 12157, 12210, 12659)) {
#     tmp_str <- unlist(strsplit(glb_allobs_df[row_pos, "descr.my"], ""))
#     glb_allobs_df[row_pos, "descr.my"] <- paste0(tmp_str[!tmp_str %in% hex_vctr],
#                                                          collapse="")
    row_pos <- which(glb_allobs_df$UniqueID == obs_id)
    glb_allobs_df[row_pos, "descr.my"] <- 
        gsub(hex_regex, " ", glb_allobs_df[row_pos, "descr.my"])
}

Step 2.2: transform data

#```{r extract_features, cache=FALSE, eval=!is.null(glb_txt_vars)}
glb_chunks_df <- myadd_chunk(glb_chunks_df, "extract.features", major.inc=TRUE)
##              label step_major step_minor    bgn    end elapsed
## 4   transform.data          2          2 17.229 17.867   0.638
## 5 extract.features          3          0 17.868     NA      NA
extract.features_chunk_df <- myadd_chunk(NULL, "extract.features_bgn")
##                  label step_major step_minor    bgn end elapsed
## 1 extract.features_bgn          1          0 17.874  NA      NA
# Options:
#   Select Tf, log(1 + Tf), Tf-IDF or BM25Tf-IDf

# Create new features that help prediction
# <col_name>.lag.2 <- lag(zoo(glb_trnobs_df$<col_name>), -2, na.pad=TRUE)
# glb_trnobs_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
# <col_name>.lag.2 <- lag(zoo(glb_newobs_df$<col_name>), -2, na.pad=TRUE)
# glb_newobs_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
# 
# glb_newobs_df[1, "<col_name>.lag.2"] <- glb_trnobs_df[nrow(glb_trnobs_df) - 1, 
#                                                    "<col_name>"]
# glb_newobs_df[2, "<col_name>.lag.2"] <- glb_trnobs_df[nrow(glb_trnobs_df), 
#                                                    "<col_name>"]
                                                   
# glb_allobs_df <- mutate(glb_allobs_df,
#     A.P.http=ifelse(grepl("http",Added,fixed=TRUE), 1, 0)
#                     )
# 
# glb_trnobs_df <- mutate(glb_trnobs_df,
#                     )
# 
# glb_newobs_df <- mutate(glb_newobs_df,
#                     )

#   Convert dates to numbers 
#       typically, dates come in as chars; 
#           so this must be done before converting chars to factors

#stop(here"); sav_allobs_df <- glb_allobs_df #; glb_allobs_df <- sav_allobs_df
if (!is.null(glb_date_vars)) {
    glb_allobs_df <- cbind(glb_allobs_df, 
        myextract_dates_df(df=glb_allobs_df, vars=glb_date_vars, 
                           id_vars=glb_id_var, rsp_var=glb_rsp_var))
    for (sfx in c("", ".POSIX"))
        glb_exclude_vars_as_features <- 
            union(glb_exclude_vars_as_features, 
                    paste(glb_date_vars, sfx, sep=""))

    for (feat in glb_date_vars) {
        glb_allobs_df <- orderBy(reformulate(paste0(feat, ".POSIX")), glb_allobs_df)
#         print(myplot_scatter(glb_allobs_df, xcol_name=paste0(feat, ".POSIX"),
#                              ycol_name=glb_rsp_var, colorcol_name=glb_rsp_var))
        print(myplot_scatter(glb_allobs_df[glb_allobs_df[, paste0(feat, ".POSIX")] >=
                                               strptime("2012-12-01", "%Y-%m-%d"), ], 
                             xcol_name=paste0(feat, ".POSIX"),
                             ycol_name=glb_rsp_var, colorcol_name=paste0(feat, ".wkend")))

        # Create features that measure the gap between previous timestamp in the data
        require(zoo)
        z <- zoo(as.numeric(as.POSIXlt(glb_allobs_df[, paste0(feat, ".POSIX")])))
        glb_allobs_df[, paste0(feat, ".zoo")] <- z
        print(head(glb_allobs_df[, c(glb_id_var, feat, paste0(feat, ".zoo"))]))
        print(myplot_scatter(glb_allobs_df[glb_allobs_df[,  paste0(feat, ".POSIX")] >
                                            strptime("2012-10-01", "%Y-%m-%d"), ], 
                            xcol_name=paste0(feat, ".zoo"), ycol_name=glb_rsp_var,
                            colorcol_name=glb_rsp_var))
        b <- zoo(, seq(nrow(glb_allobs_df)))
        
        last1 <- as.numeric(merge(z-lag(z, -1), b, all=TRUE)); last1[is.na(last1)] <- 0
        glb_allobs_df[, paste0(feat, ".last1.log")] <- log(1 + last1)
        print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[, 
                                                    paste0(feat, ".last1.log")] > 0, ], 
                               ycol_names=paste0(feat, ".last1.log"), 
                               xcol_name=glb_rsp_var))
        
        last2 <- as.numeric(merge(z-lag(z, -2), b, all=TRUE)); last2[is.na(last2)] <- 0
        glb_allobs_df[, paste0(feat, ".last2.log")] <- log(1 + last2)
        print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[, 
                                                    paste0(feat, ".last2.log")] > 0, ], 
                               ycol_names=paste0(feat, ".last2.log"), 
                               xcol_name=glb_rsp_var))
        
        last10 <- as.numeric(merge(z-lag(z, -10), b, all=TRUE)); last10[is.na(last10)] <- 0
        glb_allobs_df[, paste0(feat, ".last10.log")] <- log(1 + last10)
        print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[, 
                                                    paste0(feat, ".last10.log")] > 0, ], 
                               ycol_names=paste0(feat, ".last10.log"), 
                               xcol_name=glb_rsp_var))
        
        last100 <- as.numeric(merge(z-lag(z, -100), b, all=TRUE)); last100[is.na(last100)] <- 0
        glb_allobs_df[, paste0(feat, ".last100.log")] <- log(1 + last100)
        print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[, 
                                                    paste0(feat, ".last100.log")] > 0, ], 
                               ycol_names=paste0(feat, ".last100.log"), 
                               xcol_name=glb_rsp_var))
        
        glb_allobs_df <- orderBy(reformulate(glb_id_var), glb_allobs_df)
        glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, 
                                                c(paste0(feat, ".zoo")))
        # all2$last3 = as.numeric(merge(z-lag(z, -3), b, all = TRUE))
        # all2$last5 = as.numeric(merge(z-lag(z, -5), b, all = TRUE))
        # all2$last10 = as.numeric(merge(z-lag(z, -10), b, all = TRUE))
        # all2$last20 = as.numeric(merge(z-lag(z, -20), b, all = TRUE))
        # all2$last50 = as.numeric(merge(z-lag(z, -50), b, all = TRUE))
        # 
        # 
        # # order table
        # all2 = all2[order(all2$id),]
        # 
        # ## fill in NAs
        # # count averages
        # na.avg = all2 %>% group_by(weekend, hour) %>% dplyr::summarise(
        #     last1=mean(last1, na.rm=TRUE),
        #     last3=mean(last3, na.rm=TRUE),
        #     last5=mean(last5, na.rm=TRUE),
        #     last10=mean(last10, na.rm=TRUE),
        #     last20=mean(last20, na.rm=TRUE),
        #     last50=mean(last50, na.rm=TRUE)
        # )
        # 
        # # fill in averages
        # na.merge = merge(all2, na.avg, by=c("weekend","hour"))
        # na.merge = na.merge[order(na.merge$id),]
        # for(i in c("last1", "last3", "last5", "last10", "last20", "last50")) {
        #     y = paste0(i, ".y")
        #     idx = is.na(all2[[i]])
        #     all2[idx,][[i]] <- na.merge[idx,][[y]]
        # }
        # rm(na.avg, na.merge, b, i, idx, n, pd, sec, sh, y, z)
    }
}
rm(last1, last10, last100)
## Warning in rm(last1, last10, last100): object 'last1' not found
## Warning in rm(last1, last10, last100): object 'last10' not found
## Warning in rm(last1, last10, last100): object 'last100' not found
#   Create factors of string variables
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "factorize.str.vars"), major.inc=TRUE)
##                                 label step_major step_minor    bgn    end
## 1                extract.features_bgn          1          0 17.874 17.888
## 2 extract.features_factorize.str.vars          2          0 17.889     NA
##   elapsed
## 1   0.014
## 2      NA
#stop(here"); sav_allobs_df <- glb_allobs_df; #glb_allobs_df <- sav_allobs_df
print(str_vars <- myfind_chr_cols_df(glb_allobs_df))
##   description     condition      cellular       carrier         color 
## "description"   "condition"    "cellular"     "carrier"       "color" 
##       storage   productline          .src        .grpid    prdline.my 
##     "storage" "productline"        ".src"      ".grpid"  "prdline.my" 
##      descr.my 
##    "descr.my"
if (length(str_vars <- setdiff(str_vars, 
                               c(glb_exclude_vars_as_features, glb_txt_vars))) > 0) {
    for (var in str_vars) {
        warning("Creating factors of string variable: ", var, 
                ": # of unique values: ", length(unique(glb_allobs_df[, var])))
        glb_allobs_df[, paste0(var, ".fctr")] <- 
            relevel(factor(glb_allobs_df[, var]),
                    names(which.max(table(glb_allobs_df[, var], useNA = "ifany"))))
    }
    glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, str_vars)
}
## Warning: Creating factors of string variable: condition: # of unique
## values: 6
## Warning: Creating factors of string variable: cellular: # of unique values:
## 3
## Warning: Creating factors of string variable: carrier: # of unique values:
## 7
## Warning: Creating factors of string variable: color: # of unique values: 5
## Warning: Creating factors of string variable: storage: # of unique values:
## 5
if (!is.null(glb_txt_vars)) {
    require(foreach)
    require(gsubfn)
    require(stringr)
    require(tm)
    
    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "process.text"), major.inc=TRUE)
    
    chk_pattern_freq <- function(rex_str, ignore.case=TRUE) {
        match_mtrx <- str_extract_all(txt_vctr, regex(rex_str, ignore_case=ignore.case), 
                                      simplify=TRUE)
        match_df <- as.data.frame(match_mtrx[match_mtrx != ""])
        names(match_df) <- "pattern"
        return(mycreate_sqlxtab_df(match_df, "pattern"))        
    }

#     match_lst <- gregexpr("\\bok(?!ay)", txt_vctr[746], ignore.case = FALSE, perl=TRUE); print(match_lst)
    dsp_pattern <- function(rex_str, ignore.case=TRUE, print.all=TRUE) {
        match_lst <- gregexpr(rex_str, txt_vctr, ignore.case = ignore.case, perl=TRUE)
        match_lst <- regmatches(txt_vctr, match_lst)
        match_df <- data.frame(matches=sapply(match_lst, 
                                              function (elems) paste(elems, collapse="#")))
        match_df <- subset(match_df, matches != "")
        if (print.all)
            print(match_df)
        return(match_df)
    }
    
    dsp_matches <- function(rex_str, ix) {
        print(match_pos <- gregexpr(rex_str, txt_vctr[ix], perl=TRUE))
        print(str_sub(txt_vctr[ix], (match_pos[[1]] / 100) *  99 +   0, 
                                    (match_pos[[1]] / 100) * 100 + 100))        
    }

    myapply_gsub <- function(...) {
        if ((length_lst <- length(names(gsub_map_lst))) == 0)
            return(txt_vctr)
        for (ptn_ix in 1:length_lst) {
            if ((ptn_ix %% 10) == 0)
                print(sprintf("running gsub for %02d (of %02d): #%s#...", ptn_ix, 
                                length(names(gsub_map_lst)), names(gsub_map_lst)[ptn_ix]))
            txt_vctr <- gsub(names(gsub_map_lst)[ptn_ix], gsub_map_lst[[ptn_ix]], 
                               txt_vctr, ...)
        }
        return(txt_vctr)
    }    

    myapply_txtmap <- function(txt_vctr, ...) {
        nrows <- nrow(glb_txt_map_df)
        for (ptn_ix in 1:nrows) {
            if ((ptn_ix %% 10) == 0)
                print(sprintf("running gsub for %02d (of %02d): #%s#...", ptn_ix, 
                                nrows, glb_txt_map_df[ptn_ix, "rex_str"]))
            txt_vctr <- gsub(glb_txt_map_df[ptn_ix, "rex_str"], 
                             glb_txt_map_df[ptn_ix, "rpl_str"], 
                               txt_vctr, ...)
        }
        return(txt_vctr)
    }    

    chk.equal <- function(bgn, end) {
        print(all.equal(sav_txt_lst[["Headline"]][bgn:end], 
                        glb_txt_lst[["Headline"]][bgn:end]))
    }    
    dsp.equal <- function(bgn, end) {
        print(sav_txt_lst[["Headline"]][bgn:end])
        print(glb_txt_lst[["Headline"]][bgn:end])
    }    
#sav_txt_lst <- glb_txt_lst; all.equal(sav_txt_lst, glb_txt_lst)
#all.equal(sav_txt_lst[["Headline"]][1:4200], glb_txt_lst[["Headline"]][1:4200])
#chk.equal( 1, 100)
#dsp.equal(86, 90)
    
    txt_map_filename <- paste0(glb_txt_munge_filenames_pfx, "map.csv")
    if (!file.exists(txt_map_filename))
        stop(txt_map_filename, " not found!")
    glb_txt_map_df <- read.csv(txt_map_filename, comment.char="#", strip.white=TRUE)
    glb_txt_lst <- list(); 
    print(sprintf("Building glb_txt_lst..."))
    glb_txt_lst <- foreach(txt_var=glb_txt_vars) %dopar% {   
#     for (txt_var in glb_txt_vars) {
        txt_vctr <- glb_allobs_df[, txt_var]
        
        # myapply_txtmap shd be created as a tm_map::content_transformer ?
        #print(glb_txt_map_df)
        #txt_var=glb_txt_vars[3]; txt_vctr <- glb_txt_lst[[txt_var]]
        #print(rex_str <- glb_txt_map_df[3, "rex_str"])
        #print(rex_str <- glb_txt_map_df[glb_txt_map_df$rex_str == "\\bWall St\\.", "rex_str"])
        #print(rex_str <- glb_txt_map_df[grepl("du Pont", glb_txt_map_df$rex_str), "rex_str"])        
        #print(rex_str <- glb_txt_map_df[glb_txt_map_df$rpl_str == "versus", "rex_str"])             
        #print(tmp_vctr <- grep(rex_str, txt_vctr, value=TRUE, ignore.case=FALSE))
        #ret_lst <- regexec(rex_str, txt_vctr, ignore.case=FALSE); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])
        #gsub(rex_str, glb_txt_map_df[glb_txt_map_df$rex_str == rex_str, "rpl_str"], tmp_vctr, ignore.case=FALSE)
        #grep("Hong Hong", txt_vctr, value=TRUE)
    
        txt_vctr <- myapply_txtmap(txt_vctr, ignore.case=FALSE)    
    }
    names(glb_txt_lst) <- glb_txt_vars

    for (txt_var in glb_txt_vars) {
        print(sprintf("Remaining OK in %s:", txt_var))
        txt_vctr <- glb_txt_lst[[txt_var]]
        
        print(chk_pattern_freq(rex_str <- "(?<!(BO|HO|LO))OK(?!(E\\!|ED|IE|IN|S ))",
                               ignore.case=FALSE))
        match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
        for (row in row.names(match_df))
            dsp_matches(rex_str, ix=as.numeric(row))

        print(chk_pattern_freq(rex_str <- "Ok(?!(a\\.|ay|in|ra|um))", ignore.case=FALSE))
        match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
        for (row in row.names(match_df))
            dsp_matches(rex_str, ix=as.numeric(row))

        print(chk_pattern_freq(rex_str <- "(?<!( b| B| c| C| g| G| j| M| p| P| w| W| r| Z|\\(b|ar|bo|Bo|co|Co|Ew|gk|go|ho|ig|jo|kb|ke|Ke|ki|lo|Lo|mo|mt|no|No|po|ra|ro|sm|Sm|Sp|to|To))ok(?!(ay|bo|e |e\\)|e,|e\\.|eb|ed|el|en|er|es|ey|i |ie|in|it|ka|ke|ki|ly|on|oy|ra|st|u |uc|uy|yl|yo))",
                               ignore.case=FALSE))
        match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
        for (row in row.names(match_df))
            dsp_matches(rex_str, ix=as.numeric(row))
    }    
    # txt_vctr <- glb_txt_lst[[glb_txt_vars[1]]]
    # print(chk_pattern_freq(rex_str <- "(?<!( b| c| C| p|\\(b|bo|co|lo|Lo|Sp|to|To))ok(?!(ay|e |e\\)|e,|e\\.|ed|el|en|es|ey|ie|in|on|ra))", ignore.case=FALSE))
    # print(chk_pattern_freq(rex_str <- "ok(?!(ay|el|on|ra))", ignore.case=FALSE))
    # dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
    # dsp_matches(rex_str, ix=8)
    # substr(txt_vctr[86], 5613, 5620)
    # substr(glb_allobs_df[301, "review"], 550, 650)

#stop(here"); sav_txt_lst <- glb_txt_lst    
    for (txt_var in glb_txt_vars) {
        print(sprintf("Remaining Acronyms in %s:", txt_var))
        txt_vctr <- glb_txt_lst[[txt_var]]
        
        print(chk_pattern_freq(rex_str <- "([[:upper:]]\\.( *)){2,}", ignore.case=FALSE))
        
        # Check for names
        print(subset(chk_pattern_freq(rex_str <- "(([[:upper:]]+)\\.( *)){1}",
                                      ignore.case=FALSE),
                     .n > 1))
        # dsp_pattern(rex_str="(OK\\.( *)){1}", ignore.case=FALSE)
        # dsp_matches(rex_str="(OK\\.( *)){1}", ix=557)
        #dsp_matches(rex_str="\\bR\\.I\\.P(\\.*)(\\B)", ix=461)
        #dsp_matches(rex_str="\\bR\\.I\\.P(\\.*)", ix=461)        
        #print(str_sub(txt_vctr[676], 10100, 10200))
        #print(str_sub(txt_vctr[74], 1, -1))        
    }

    for (txt_var in glb_txt_vars) {
        re_str <- "\\b(Fort|Ft\\.|Hong|Las|Los|New|Puerto|Saint|San|St\\.)( |-)(\\w)+"
        print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))
        txt_vctr <- glb_txt_lst[[txt_var]]        
        print(orderBy(~ -.n +pattern, subset(chk_pattern_freq(re_str, ignore.case=FALSE), 
                                             grepl("( |-)[[:upper:]]", pattern))))
        print("    consider cleaning if relevant to problem domain; geography name; .n > 1")
        #grep("New G", txt_vctr, value=TRUE, ignore.case=FALSE)
        #grep("St\\. Wins", txt_vctr, value=TRUE, ignore.case=FALSE)
    }        
        
#stop(here"); sav_txt_lst <- glb_txt_lst    
    for (txt_var in glb_txt_vars) {
        re_str <- "\\b(N|S|E|W|C)( |\\.)(\\w)+"
        print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))        
        txt_vctr <- glb_txt_lst[[txt_var]]                
        print(orderBy(~ -.n +pattern, subset(chk_pattern_freq(re_str, ignore.case=FALSE), 
                                             grepl(".", pattern))))
        #grep("N Weaver", txt_vctr, value=TRUE, ignore.case=FALSE)        
    }    

    for (txt_var in glb_txt_vars) {
        re_str <- "\\b(North|South|East|West|Central)( |\\.)(\\w)+"
        print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))        
        txt_vctr <- glb_txt_lst[[txt_var]]
        if (nrow(filtered_df <- subset(chk_pattern_freq(re_str, ignore.case=FALSE), 
                                             grepl(".", pattern))) > 0)
            print(orderBy(~ -.n +pattern, filtered_df))
        #grep("Central (African|Bankers|Cast|Italy|Role|Spring)", txt_vctr, value=TRUE, ignore.case=FALSE)
        #grep("East (Africa|Berlin|London|Poland|Rivals|Spring)", txt_vctr, value=TRUE, ignore.case=FALSE)
        #grep("North (American|Korean|West)", txt_vctr, value=TRUE, ignore.case=FALSE)        
        #grep("South (Pacific|Street)", txt_vctr, value=TRUE, ignore.case=FALSE)
        #grep("St\\. Martins", txt_vctr, value=TRUE, ignore.case=FALSE)
    }    

    find_cmpnd_wrds <- function(txt_vctr) {
        txt_corpus <- Corpus(VectorSource(txt_vctr))
        txt_corpus <- tm_map(txt_corpus, content_transformer(tolower), lazy=TRUE)
        txt_corpus <- tm_map(txt_corpus, PlainTextDocument, lazy=TRUE)
        txt_corpus <- tm_map(txt_corpus, removePunctuation, lazy=TRUE, 
                             preserve_intra_word_dashes=TRUE, lazy=TRUE)
        full_Tf_DTM <- DocumentTermMatrix(txt_corpus, 
                                          control=list(weighting=weightTf))
        print("   Full TermMatrix:"); print(full_Tf_DTM)
        full_Tf_mtrx <- as.matrix(full_Tf_DTM)
        rownames(full_Tf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
        full_Tf_vctr <- colSums(full_Tf_mtrx)
        names(full_Tf_vctr) <- dimnames(full_Tf_DTM)[[2]]
        #grep("year", names(full_Tf_vctr), value=TRUE)
        #which.max(full_Tf_mtrx[, "yearlong"])
        full_Tf_df <- as.data.frame(full_Tf_vctr)
        names(full_Tf_df) <- "Tf.full"
        full_Tf_df$term <- rownames(full_Tf_df)
        #full_Tf_df$freq.full <- colSums(full_Tf_mtrx != 0)
        full_Tf_df <- orderBy(~ -Tf.full, full_Tf_df)
        cmpnd_Tf_df <- full_Tf_df[grep("-", full_Tf_df$term, value=TRUE) ,]
        
        txt_compound_filename <- paste0(glb_txt_munge_filenames_pfx, "compound.csv")
        if (!file.exists(txt_compound_filename))
            stop(txt_compound_filename, " not found!")
        filter_df <- read.csv(txt_compound_filename, comment.char="#", strip.white=TRUE)
        cmpnd_Tf_df$filter <- FALSE
        for (row_ix in 1:nrow(filter_df))
            cmpnd_Tf_df[!cmpnd_Tf_df$filter, "filter"] <- 
            grepl(filter_df[row_ix, "rex_str"], 
                  cmpnd_Tf_df[!cmpnd_Tf_df$filter, "term"], ignore.case=TRUE)
        cmpnd_Tf_df <- subset(cmpnd_Tf_df, !filter)
        # Bug in tm_map(txt_corpus, removePunctuation, preserve_intra_word_dashes=TRUE) ???
        #   "net-a-porter" gets converted to "net-aporter"
        #grep("net-a-porter", txt_vctr, ignore.case=TRUE, value=TRUE)
        #grep("maser-laser", txt_vctr, ignore.case=TRUE, value=TRUE)
        #txt_corpus[[which(grepl("net-a-porter", txt_vctr, ignore.case=TRUE))]]
        #grep("\\b(across|longer)-(\\w)", cmpnd_Tf_df$term, ignore.case=TRUE, value=TRUE)
        #grep("(\\w)-(affected|term)\\b", cmpnd_Tf_df$term, ignore.case=TRUE, value=TRUE)
        
        print(sprintf("nrow(cmpnd_Tf_df): %d", nrow(cmpnd_Tf_df)))
        myprint_df(cmpnd_Tf_df)
    }

    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "process.text_reporting_compound_terms"), major.inc=FALSE)
    
    for (txt_var in glb_txt_vars) {
        print(sprintf("Remaining compound terms in %s: ", txt_var))        
        txt_vctr <- glb_txt_lst[[txt_var]]                        
#         find_cmpnd_wrds(txt_vctr)
        #grep("thirty-five", txt_vctr, ignore.case=TRUE, value=TRUE)
        #rex_str <- glb_txt_map_df[grepl("hirty", glb_txt_map_df$rex_str), "rex_str"]
    }

    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "build.corpus"), major.inc=TRUE)
    
    get_DTM_terms <- function(DTM) {
        TfIdf_mtrx <- as.matrix(DTM)
        rownames(TfIdf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
        TfIdf_vctr <- colSums(TfIdf_mtrx)
        names(TfIdf_vctr) <- dimnames(DTM)[[2]]
        TfIdf_df <- as.data.frame(TfIdf_vctr)
        names(TfIdf_df) <- "TfIdf"
        TfIdf_df$term <- rownames(TfIdf_df)
        TfIdf_df$freq <- colSums(TfIdf_mtrx != 0)
        TfIdf_df$pos <- 1:nrow(TfIdf_df)
        
        TfIdf_df$cor.y <- cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]),
                            use="pairwise.complete.obs")
        TfIdf_df$cor.y.abs <- abs(TfIdf_df$cor.y)

        for (cls in unique(glb_allobs_df[, glb_txt_cor_var])) {
            if (!is.na(cls))
                TfIdf_df[, paste0("TfIdf.", as.character(cls))] <- 
                    colSums(TfIdf_mtrx * 
                            as.numeric(!is.na(glb_allobs_df[, glb_txt_cor_var]) &
                                        (glb_allobs_df[, glb_txt_cor_var] == cls))) else
                TfIdf_df[, paste0("TfIdf.", as.character(cls))] <- 
                    colSums(TfIdf_mtrx * 
                            as.numeric(is.na(glb_allobs_df[, glb_txt_cor_var])))
        }    
        
        # Check all calls to get_DTM_terms to change returned order assumption
        return(TfIdf_df <- orderBy(~ -TfIdf, TfIdf_df))
    }
    #plt_full_df <- get_DTM_terms(DTM=glb_full_DTM_lst[[txt_var]])
    
    get_corpus_terms <- function(txt_corpus) {
        TfIdf_DTM <- DocumentTermMatrix(txt_corpus, 
                                        control=list(weighting=weightTfIdf))
        return(TfIdf_df <- get_DTM_terms(TfIdf_DTM))
    }
    
#stop(here")    
    glb_corpus_lst <- list()
    print(sprintf("Building glb_corpus_lst..."))
    glb_corpus_lst <- foreach(txt_var=glb_txt_vars) %dopar% {   
    # for (txt_var in glb_txt_vars) {
        txt_corpus <- Corpus(VectorSource(glb_txt_lst[[txt_var]]))
        #tolower Not needed as of version 0.6.2 ?
        txt_corpus <- tm_map(txt_corpus, PlainTextDocument, lazy=FALSE)
        txt_corpus <- tm_map(txt_corpus, content_transformer(tolower), lazy=FALSE) #nuppr
        # removePunctuation does not replace with whitespace. Use a custom transformer ???
        txt_corpus <- tm_map(txt_corpus, removePunctuation, lazy=TRUE) #npnct<chr_ix>
#         txt-corpus <- tm_map(txt_corpus, content_transformer(function(x, pattern) gsub(pattern, "", x))   
        txt_corpus <- tm_map(txt_corpus, removeWords, 
                             c(glb_append_stop_words[[txt_var]], 
                               stopwords("english")), lazy=TRUE) #nstopwrds
        #print("StoppedWords:"); stopped_words_TfIdf_df <- inspect_terms(txt_corpus)
        #stopped_words_TfIdf_df[grepl("cond", stopped_words_TfIdf_df$term, ignore.case=TRUE), ]
        #txt_X_mtrx <- as.matrix(DocumentTermMatrix(txt_corpus, control=list(weighting=weightTfIdf)))
        #which(txt_X_mtrx[, 211] > 0)
        #glb_allobs_df[which(txt_X_mtrx[, 211] > 0), glb_txt_vars]        
        #txt_X_mtrx[2159, txt_X_mtrx[2159, ] > 0]
        
        # txt_corpus <- tm_map(txt_corpus, stemDocument, "english", lazy=TRUE) #Done below
        #txt_corpus <- tm_map(txt_corpus, content_transformer(stemDocument))        
        #print("StemmedWords:"); stemmed_words_TfIdf_df <- inspect_terms(txt_corpus)
        #stemmed_words_TfIdf_df[grepl("cond", stemmed_words_TfIdf_df$term, ignore.case=TRUE), ]
        #stm_X_mtrx <- as.matrix(DocumentTermMatrix(txt_corpus, control=list(weighting=weightTfIdf)))
        #glb_allobs_df[which((stm_X_mtrx[, 180] > 0) | (stm_X_mtrx[, 181] > 0)), glb_txt_vars]
        #glb_allobs_df[which((stm_X_mtrx[, 181] > 0)), glb_txt_vars]

        # glb_corpus_lst[[txt_var]] <- txt_corpus
    }
    names(glb_corpus_lst) <- glb_txt_vars
    
#stop(here")        
    glb_post_stop_words_terms_df_lst <- list(); 
    glb_post_stop_words_TfIdf_mtrx_lst <- list();     
    glb_post_stem_words_terms_df_lst <- list(); 
    glb_post_stem_words_TfIdf_mtrx_lst <- list();     
    for (txt_var in glb_txt_vars) {
        print(sprintf("    Top_n stop TfIDf terms for %s:", txt_var))
        # This impacts stemming probably due to lazy parameter
        print(myprint_df(full_TfIdf_df <- get_corpus_terms(glb_corpus_lst[[txt_var]]), 
                   glb_txt_top_n[[txt_var]]))
        glb_post_stop_words_terms_df_lst[[txt_var]] <- full_TfIdf_df
        TfIdf_stop_mtrx <- as.matrix(DocumentTermMatrix(glb_corpus_lst[[txt_var]], 
                                        control=list(weighting=weightTfIdf)))
        rownames(TfIdf_stop_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
        glb_post_stop_words_TfIdf_mtrx_lst[[txt_var]] <- TfIdf_stop_mtrx
        
        tmp_allobs_df <- glb_allobs_df[, c(glb_id_var, glb_rsp_var)]
        tmp_allobs_df$terms.n.post.stop <- rowSums(TfIdf_stop_mtrx > 0)
        tmp_allobs_df$terms.n.post.stop.log <- log(1 + tmp_allobs_df$terms.n.post.stop)
        tmp_allobs_df$TfIdf.sum.post.stop <- rowSums(TfIdf_stop_mtrx)        
        
        print(sprintf("    Top_n stem TfIDf terms for %s:", txt_var))        
        glb_corpus_lst[[txt_var]] <- tm_map(glb_corpus_lst[[txt_var]], stemDocument,
                                            "english", lazy=TRUE) #Features ???
        print(myprint_df(full_TfIdf_df <- get_corpus_terms(glb_corpus_lst[[txt_var]]), 
                   glb_txt_top_n[[txt_var]]))
        glb_post_stem_words_terms_df_lst[[txt_var]] <- full_TfIdf_df        
        TfIdf_stem_mtrx <- as.matrix(DocumentTermMatrix(glb_corpus_lst[[txt_var]], 
                                        control=list(weighting=weightTfIdf)))
        rownames(TfIdf_stem_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
        glb_post_stem_words_TfIdf_mtrx_lst[[txt_var]] <- TfIdf_stem_mtrx
        
        tmp_allobs_df$terms.n.post.stem <- rowSums(TfIdf_stem_mtrx > 0)
        tmp_allobs_df$terms.n.post.stem.log <- log(1 + tmp_allobs_df$terms.n.post.stem)
        tmp_allobs_df$TfIdf.sum.post.stem <- rowSums(TfIdf_stem_mtrx)
        
        tmp_allobs_df$terms.n.stem.stop.Ratio <- 
            1.0 * tmp_allobs_df$terms.n.post.stem / tmp_allobs_df$terms.n.post.stop
        tmp_allobs_df[is.nan(tmp_allobs_df$terms.n.stem.stop.Ratio), 
                      "terms.n.stem.stop.Ratio"] <- 1.0                
        tmp_allobs_df$TfIdf.sum.stem.stop.Ratio <- 
            1.0 * tmp_allobs_df$TfIdf.sum.post.stem / tmp_allobs_df$TfIdf.sum.post.stop
        tmp_allobs_df[is.nan(tmp_allobs_df$TfIdf.sum.stem.stop.Ratio), 
                      "TfIdf.sum.stem.stop.Ratio"] <- 1.0                
        
        tmp_trnobs_df <- tmp_allobs_df[!is.na(tmp_allobs_df[, glb_rsp_var]), ]
        print(cor(as.matrix(tmp_trnobs_df[, -c(1, 2)]), 
                  as.numeric(tmp_trnobs_df[, glb_rsp_var])))
        
        txt_var_pfx <- toupper(substr(txt_var, 1, 1))
        tmp_allobs_df <- tmp_allobs_df[, -c(1, 2)]
        names(tmp_allobs_df) <- paste(paste0(txt_var_pfx, "."), names(tmp_allobs_df),
                                      sep="")
        glb_allobs_df <- cbind(glb_allobs_df, tmp_allobs_df)
        glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, 
            paste(txt_var_pfx, c("terms.n.post.stop", "terms.n.post.stem")))
    }
    
    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "extract.DTM"), major.inc=TRUE)

#stop(here")    
    glb_full_DTM_lst <- list(); glb_sprs_DTM_lst <- list();
    for (txt_var in glb_txt_vars) {
        print(sprintf("Extracting TfIDf terms for %s...", txt_var))        
        txt_corpus <- glb_corpus_lst[[txt_var]]
        
#         full_Tf_DTM <- DocumentTermMatrix(txt_corpus, 
#                                           control=list(weighting=weightTf))
        full_TfIdf_DTM <- DocumentTermMatrix(txt_corpus, 
                                          control=list(weighting=weightTfIdf))
        sprs_TfIdf_DTM <- removeSparseTerms(full_TfIdf_DTM, 
                                            glb_sprs_thresholds[txt_var])
        
#         glb_full_DTM_lst[[txt_var]] <- full_Tf_DTM
#         glb_sprs_DTM_lst[[txt_var]] <- sprs_Tf_DTM
        glb_full_DTM_lst[[txt_var]] <- full_TfIdf_DTM
        glb_sprs_DTM_lst[[txt_var]] <- sprs_TfIdf_DTM
    }

    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
            paste0("extract.features_", "report.DTM"), major.inc=TRUE)

    require(reshape2)
    for (txt_var in glb_txt_vars) {
        print(sprintf("Reporting TfIDf terms for %s...", txt_var))        
        full_TfIdf_DTM <- glb_full_DTM_lst[[txt_var]]
        sprs_TfIdf_DTM <- glb_sprs_DTM_lst[[txt_var]]        

        print("   Full TermMatrix:"); print(full_TfIdf_DTM)
        full_TfIdf_df <- get_DTM_terms(full_TfIdf_DTM)
        full_TfIdf_df <- full_TfIdf_df[, c(2, 1, 3, 4)]
        col_names <- names(full_TfIdf_df)
        col_names[2:length(col_names)] <- 
            paste(col_names[2:length(col_names)], ".full", sep="")
        names(full_TfIdf_df) <- col_names
#         full_TfIdf_mtrx <- as.matrix(full_TfIdf_DTM)
#         rownames(full_TfIdf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
#         full_TfIdf_vctr <- colSums(full_TfIdf_mtrx)
#         names(full_TfIdf_vctr) <- dimnames(full_TfIdf_DTM)[[2]]
#         full_TfIdf_df <- as.data.frame(full_TfIdf_vctr)
#         names(full_TfIdf_df) <- "TfIdf.full"
#         full_TfIdf_df$term <- rownames(full_TfIdf_df)
#         full_TfIdf_df$freq.full <- colSums(full_TfIdf_mtrx != 0)
#         full_TfIdf_df <- orderBy(~ -TfIdf.full, full_TfIdf_df)

        print("   Sparse TermMatrix:"); print(sprs_TfIdf_DTM)
        sprs_TfIdf_df <- get_DTM_terms(sprs_TfIdf_DTM)
        sprs_TfIdf_df <- sprs_TfIdf_df[, c(2, 1, 3, 4)]
        col_names <- names(sprs_TfIdf_df)
        col_names[2:length(col_names)] <- 
            paste(col_names[2:length(col_names)], ".sprs", sep="")
        names(sprs_TfIdf_df) <- col_names
#         sprs_TfIdf_vctr <- colSums(as.matrix(sprs_TfIdf_DTM))
#         names(sprs_TfIdf_vctr) <- dimnames(sprs_TfIdf_DTM)[[2]]
#         sprs_TfIdf_df <- as.data.frame(sprs_TfIdf_vctr)
#         names(sprs_TfIdf_df) <- "TfIdf.sprs"
#         sprs_TfIdf_df$term <- rownames(sprs_TfIdf_df)
#         sprs_TfIdf_df$freq.sprs <- colSums(as.matrix(sprs_TfIdf_DTM) != 0)        
#         sprs_TfIdf_df <- orderBy(~ -TfIdf.sprs, sprs_TfIdf_df)
        
        terms_TfIdf_df <- merge(full_TfIdf_df, sprs_TfIdf_df, all.x=TRUE)
        terms_TfIdf_df$in.sprs <- !is.na(terms_TfIdf_df$freq.sprs)
        plt_TfIdf_df <- subset(terms_TfIdf_df, 
                               TfIdf.full >= min(terms_TfIdf_df$TfIdf.sprs, na.rm=TRUE))
        plt_TfIdf_df$label <- ""
        plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "label"] <- 
            plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "term"]
#         glb_important_terms[[txt_var]] <- union(glb_important_terms[[txt_var]],
#             plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "term"])
        print(myplot_scatter(plt_TfIdf_df, "freq.full", "TfIdf.full", 
                             colorcol_name="in.sprs") + 
                  geom_text(aes(label=label), color="Black", size=3.5))
        
        melt_TfIdf_df <- orderBy(~ -value, melt(terms_TfIdf_df, id.var="term"))
        print(ggplot(melt_TfIdf_df, aes(value, color=variable)) + stat_ecdf() + 
                  geom_hline(yintercept=glb_sprs_thresholds[txt_var], 
                             linetype = "dotted"))
        
        melt_TfIdf_df <- orderBy(~ -value, 
                        melt(subset(terms_TfIdf_df, !is.na(TfIdf.sprs)), id.var="term"))
        print(myplot_hbar(melt_TfIdf_df, "term", "value", 
                          colorcol_name="variable"))
        
        melt_TfIdf_df <- orderBy(~ -value, 
                        melt(subset(terms_TfIdf_df, is.na(TfIdf.sprs)), id.var="term"))
        print(myplot_hbar(head(melt_TfIdf_df, 10), "term", "value", 
                          colorcol_name="variable"))
    }

#     sav_full_DTM_lst <- glb_full_DTM_lst
#     sav_sprs_DTM_lst <- glb_sprs_DTM_lst
#     print(identical(sav_glb_corpus_lst, glb_corpus_lst))
#     print(all.equal(length(sav_glb_corpus_lst), length(glb_corpus_lst)))
#     print(all.equal(names(sav_glb_corpus_lst), names(glb_corpus_lst)))
#     print(all.equal(sav_glb_corpus_lst[["Headline"]], glb_corpus_lst[["Headline"]]))

#     print(identical(sav_full_DTM_lst, glb_full_DTM_lst))
#     print(identical(sav_sprs_DTM_lst, glb_sprs_DTM_lst))
        
    rm(full_TfIdf_mtrx, full_TfIdf_df, melt_TfIdf_df, terms_TfIdf_df)

    # Create txt features
    if ((length(glb_txt_vars) > 1) &&
        (length(unique(pfxs <- sapply(glb_txt_vars, 
                    function(txt) toupper(substr(txt, 1, 1))))) < length(glb_txt_vars)))
            stop("Prefixes for corpus freq terms not unique: ", pfxs)
    
    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
                            paste0("extract.features_", "bind.DTM"), 
                                         major.inc=TRUE)
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df            
    require(tidyr)
    for (txt_var in glb_txt_vars) {
        print(sprintf("Binding DTM for %s...", txt_var))
        txt_var_pfx <- toupper(substr(txt_var, 1, 1))
        
        txt_full_X_df <- as.data.frame(as.matrix(glb_full_DTM_lst[[txt_var]]))
        terms_full_df <- get_DTM_terms(glb_full_DTM_lst[[txt_var]])        
        colnames(txt_full_X_df) <- paste(txt_var_pfx, ".T.",
                                    make.names(colnames(txt_full_X_df)), sep="")
        rownames(txt_full_X_df) <- rownames(glb_allobs_df) # warning otherwise
        
        plt_full_df <- terms_full_df
        names(plt_full_df)[grepl("TfIdf$", names(plt_full_df))] <- "TfIdf.all"
    #     gather(plt_full_df[1:5, ], domain, TfIdf, -matches("!(TfIdf)"))
    #     gather(plt_full_df[1:5, grepl("TfIdf", names(plt_full_df))], domain, TfIdf) 
    #     gather(plt_full_df[1:5, ], domain, TfIdf, 
    #            -names(plt_full_df)[!grepl("TfIdf", names(plt_full_df))]) 
        plt_full_df <- gather(plt_full_df, domain, TfIdf, 
                              -c(term, freq, pos, cor.y, cor.y.abs))
        plt_full_df$label <- NA
        top_val_terms <- orderBy(~-TfIdf, terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
        plt_full_df[plt_full_df$term %in% top_val_terms, "label"] <- 
            plt_full_df[plt_full_df$term %in% top_val_terms, "term"]
        top_cor_terms <- orderBy(~-cor.y.abs,
                                 terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
        plt_full_df[plt_full_df$term %in% top_cor_terms, "label"] <- 
            plt_full_df[plt_full_df$term %in% top_cor_terms, "term"]
        print(ggplot(plt_full_df, aes(x=TfIdf, y=cor.y)) + facet_wrap(~ domain) + 
                  geom_point(aes(size=freq), color="grey") + 
                  geom_jitter() + 
                  geom_text(aes(label=label), color="NavyBlue", size=3.5))

        if (glb_txt_filter_terms == "sparse") {
            txt_X_df <- as.data.frame(as.matrix(glb_sprs_DTM_lst[[txt_var]]))
            select_terms <- make.names(colnames(txt_X_df))
#             colnames(txt_X_df) <- paste(txt_var_pfx, ".T.",
#                                         make.names(colnames(txt_X_df)), sep="")
#             rownames(txt_X_df) <- rownames(glb_allobs_df) # warning otherwise
        } else if (glb_txt_filter_terms == "top.val") {
            select_terms <- orderBy(~-TfIdf,
                                    terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
#             txt_X_df <- txt_full_X_df[, subset(terms_full_df, term %in% select_terms)$pos,
#                                       FALSE]
        } else if (glb_txt_filter_terms == "top.cor") {
            select_terms <- orderBy(~-cor.y.abs,
                                    terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
#             txt_X_df <- txt_full_X_df[, subset(terms_full_df, term %in% select_terms)$pos,
#                                       FALSE]
        } else stop(
        "glb_txt_filter_terms should be one of c('sparse', 'top.val', 'top.cor') vs. '",
                    glb_txt_filter_terms, "'")    
        
        assoc_terms_lst <- findAssocs(glb_full_DTM_lst[[txt_var]], select_terms, c(0.2))
        assoc_terms <- c(NULL)
        for (term in names(assoc_terms_lst))
            if (length(assoc_terms_lst[[term]]) > 0)
                assoc_terms <- union(assoc_terms, names(assoc_terms_lst[[term]]))
        
        txt_X_df <- txt_full_X_df[, 
                        subset(terms_full_df, term %in% c(select_terms, assoc_terms))$pos,
                                    FALSE]
        glb_allobs_df <- cbind(glb_allobs_df, txt_X_df) # TfIdf is normalized
        #glb_allobs_df <- cbind(glb_allobs_df, log_X_df) # if using non-normalized metrics 
    }
    #identical(chk_entity_df, glb_allobs_df)
    #chk_entity_df <- glb_allobs_df

    extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, 
                            paste0("extract.features_", "bind.DXM"), 
                                         major.inc=TRUE)

#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
    glb_punct_vctr <- c("!", "\"", "#", "\\$", "%", "&", "'", 
                        "\\(|\\)",# "\\(", "\\)", 
                        "\\*", "\\+", ",", "-", "\\.", "/", ":", ";", 
                        "<|>", # "<", 
                        "=", 
                        # ">", 
                        "\\?", "@", "\\[", "\\\\", "\\]", "^", "_", "`", 
                        "\\{", "\\|", "\\}", "~")
    txt_X_df <- glb_allobs_df[, c(glb_id_var, ".rnorm"), FALSE]
    txt_X_df <- foreach(txt_var=glb_txt_vars, .combine=cbind) %dopar% {   
    #for (txt_var in glb_txt_vars) {
        print(sprintf("Binding DXM for %s...", txt_var))
        txt_var_pfx <- toupper(substr(txt_var, 1, 1))        

        txt_full_DTM_mtrx <- as.matrix(glb_full_DTM_lst[[txt_var]])
        rownames(txt_full_DTM_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
        #print(txt_full_DTM_mtrx[txt_full_DTM_mtrx[, "ebola"] != 0, "ebola"])
        
        # Create <txt_var>.T.<term> for glb_important_terms
        for (term in glb_important_terms[[txt_var]])
            txt_X_df[, paste0(txt_var_pfx, ".T.", make.names(term))] <- 
                txt_full_DTM_mtrx[, term]
                
        # Create <txt_var>.nwrds.log & .nwrds.unq.log
        txt_X_df[, paste0(txt_var_pfx, ".nwrds.log")] <- 
            log(1 + mycount_pattern_occ("\\w+", glb_txt_lst[[txt_var]]))
        txt_X_df[, paste0(txt_var_pfx, ".nwrds.unq.log")] <- 
            log(1 + rowSums(txt_full_DTM_mtrx != 0))
        txt_X_df[, paste0(txt_var_pfx, ".sum.TfIdf")] <- 
            rowSums(txt_full_DTM_mtrx) 
        txt_X_df[, paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")] <- 
            txt_X_df[, paste0(txt_var_pfx, ".sum.TfIdf")] / 
            (exp(txt_X_df[, paste0(txt_var_pfx, ".nwrds.log")]) - 1)
        txt_X_df[is.nan(txt_X_df[, paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")]),
                 paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")] <- 0

        # Create <txt_var>.nchrs.log
        txt_X_df[, paste0(txt_var_pfx, ".nchrs.log")] <- 
            log(1 + mycount_pattern_occ(".", glb_allobs_df[, txt_var]))
        txt_X_df[, paste0(txt_var_pfx, ".nuppr.log")] <- 
            log(1 + mycount_pattern_occ("[[:upper:]]", glb_allobs_df[, txt_var]))
        txt_X_df[, paste0(txt_var_pfx, ".ndgts.log")] <- 
            log(1 + mycount_pattern_occ("[[:digit:]]", glb_allobs_df[, txt_var]))

        # Create <txt_var>.npnct?.log
        # would this be faster if it's iterated over each row instead of 
        #   each created column ???
        for (punct_ix in 1:length(glb_punct_vctr)) { 
#             smp0 <- " "
#             smp1 <- "! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~"
#             smp2 <- paste(smp1, smp1, sep=" ")
#             print(sprintf("Testing %s pattern:", glb_punct_vctr[punct_ix])) 
#             results <- mycount_pattern_occ(glb_punct_vctr[punct_ix], c(smp0, smp1, smp2))
#             names(results) <- NULL; print(results)
            txt_X_df[, 
                paste0(txt_var_pfx, ".npnct", sprintf("%02d", punct_ix), ".log")] <-
                log(1 + mycount_pattern_occ(glb_punct_vctr[punct_ix], 
                                            glb_allobs_df[, txt_var]))
        }
#         print(head(glb_allobs_df[glb_allobs_df[, "A.npnct23.log"] > 0, 
#                                     c("UniqueID", "Popular", "Abstract", "A.npnct23.log")]))    
        
        # Create <txt_var>.nstopwrds.log & <txt_var>ratio.nstopwrds.nwrds
        stop_words_rex_str <- paste0("\\b(", paste0(c(glb_append_stop_words[[txt_var]], 
                                       stopwords("english")), collapse="|"),
                                     ")\\b")
        txt_X_df[, paste0(txt_var_pfx, ".nstopwrds", ".log")] <-
            log(1 + mycount_pattern_occ(stop_words_rex_str, glb_txt_lst[[txt_var]]))
        txt_X_df[, paste0(txt_var_pfx, ".ratio.nstopwrds.nwrds")] <-
            exp(txt_X_df[, paste0(txt_var_pfx, ".nstopwrds", ".log")] - 
                txt_X_df[, paste0(txt_var_pfx, ".nwrds", ".log")])

        # Create <txt_var>.P.http
        txt_X_df[, paste(txt_var_pfx, ".P.http", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("http", glb_allobs_df[, txt_var]))    
    
        # Create <txt_var>.P.mini & air
        txt_X_df[, paste(txt_var_pfx, ".P.mini", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("mini(?!m)", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
        txt_X_df[, paste(txt_var_pfx, ".P.air", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("(?<![fhp])air", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
        txt_X_df[, paste(txt_var_pfx, ".P.black", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("black", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
        txt_X_df[, paste(txt_var_pfx, ".P.white", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("white", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
        txt_X_df[, paste(txt_var_pfx, ".P.gold", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("gold", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
        txt_X_df[, paste(txt_var_pfx, ".P.spacegray", sep="")] <- 
            as.integer(0 + mycount_pattern_occ("spacegray", glb_allobs_df[, txt_var],
                                               perl=TRUE))    
    
        txt_X_df <- subset(txt_X_df, select=-.rnorm)
        txt_X_df <- txt_X_df[, -grep(glb_id_var, names(txt_X_df), fixed=TRUE), FALSE]
        #glb_allobs_df <- cbind(glb_allobs_df, txt_X_df)
    }
    glb_allobs_df <- cbind(glb_allobs_df, txt_X_df)
    #myplot_box(glb_allobs_df, "A.sum.TfIdf", glb_rsp_var)
    
#     if (sum(is.na(glb_allobs_df$D.P.http)) > 0)
#         stop("Why is this happening ?")

    # Generate summaries
#     print(summary(glb_allobs_df))
#     print(sapply(names(glb_allobs_df), function(col) sum(is.na(glb_allobs_df[, col]))))
#     print(summary(glb_trnobs_df))
#     print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
#     print(summary(glb_newobs_df))
#     print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))

    glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, 
                                          glb_txt_vars)
    rm(log_X_df, txt_X_df)
}
## Loading required package: stringr
## Loading required package: tm
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate
##                                 label step_major step_minor    bgn    end
## 2 extract.features_factorize.str.vars          2          0 17.889 18.173
## 3       extract.features_process.text          3          0 18.174     NA
##   elapsed
## 2   0.284
## 3      NA
## [1] "Building glb_txt_lst..."
## [1] "running gsub for 10 (of 178): #\\bCentral African Republic\\b#..."
## [1] "running gsub for 20 (of 178): #\\bAlejandro G\\. I&ntilde;&aacute;rritu#..."
## [1] "running gsub for 30 (of 178): #\\bC\\.A\\.A\\.#..."
## [1] "running gsub for 40 (of 178): #\\bCV\\.#..."
## [1] "running gsub for 50 (of 178): #\\bE\\.P\\.A\\.#..."
## [1] "running gsub for 60 (of 178): #\\bG\\.I\\. Joe#..."
## [1] "running gsub for 70 (of 178): #\\bISIS\\.#..."
## [1] "running gsub for 80 (of 178): #\\bJ\\.K\\. Simmons#..."
## [1] "running gsub for 90 (of 178): #\\bM\\. Henri Pol#..."
## [1] "running gsub for 100 (of 178): #\\bN\\.Y\\.S\\.E\\.#..."
## [1] "running gsub for 110 (of 178): #\\bR\\.B\\.S\\.#..."
## [1] "running gsub for 120 (of 178): #\\bSteven A\\. Cohen#..."
## [1] "running gsub for 130 (of 178): #\\bV\\.A\\.#..."
## [1] "running gsub for 140 (of 178): #\\bWall Street#..."
## [1] "running gsub for 150 (of 178): #\\bSaint( |-)((Laurent|Lucia)\\b)+#..."
## [1] "running gsub for 160 (of 178): #\\bSouth( |\\\\.)(America|American|Africa|African|Carolina|Dakota|Korea|Korean|Sudan)\\b#..."
## [1] "running gsub for 170 (of 178): #(\\w)-a-year#..."
## [1] "Remaining OK in descr.my:"
##   pattern .n
## 1      OK  6
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN DEVICE: Problem with Apple ID"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##         
## [1,] 0 0
## attr(,"capture.length")
##         
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
## 
## [1] "ROKEN SCREEN"
## [1] pattern .n     
## <0 rows> (or 0-length row.names)
## [1] pattern .n     
## <0 rows> (or 0-length row.names)
## [1] "Remaining Acronyms in descr.my:"
## [1] pattern .n     
## <0 rows> (or 0-length row.names)
##        pattern .n
## 1  CONDITION.   8
## 2        ONLY.  6
## 3         GB.   4
## 4       BOX.    2
## 5     CORNER.   2
## 6         ESN.  2
## 7       GOOD.   2
## 8     ICLOUD.   2
## 9       IPADS.  2
## 10    LOCKED.   2
## 11     LOCKS.   2
## 12      ONLY.   2
## 13 SCRATCHES.   2
## 14    TEARS.    2
## 15       USE.   2
## [1] "Remaining #\\b(Fort|Ft\\.|Hong|Las|Los|New|Puerto|Saint|San|St\\.)( |-)(\\w)+# terms in descr.my: "
##          pattern .n
## 2       New Open  3
## 4  New Condition  2
## 7  New Digitizer  1
## 8     New Opened  1
## 9    New Scratch  1
## 10    New Screen  1
## [1] "    consider cleaning if relevant to problem domain; geography name; .n > 1"
## [1] "Remaining #\\b(N|S|E|W|C)( |\\.)(\\w)+# terms in descr.my: "
##   pattern .n
## 1 C Stock  3
## 2  W blue  1
## [1] "Remaining #\\b(North|South|East|West|Central)( |\\.)(\\w)+# terms in descr.my: "
##                                                    label step_major
## 3                          extract.features_process.text          3
## 4 extract.features_process.text_reporting_compound_terms          3
##   step_minor    bgn    end elapsed
## 3          0 18.174 19.789   1.615
## 4          1 19.789     NA      NA
## [1] "Remaining compound terms in descr.my: "
##                                                    label step_major
## 4 extract.features_process.text_reporting_compound_terms          3
## 5                          extract.features_build.corpus          4
##   step_minor    bgn    end elapsed
## 4          1 19.789 19.794   0.005
## 5          0 19.794     NA      NA
## [1] "Building glb_corpus_lst..."
## [1] "    Top_n stop TfIDf terms for descr.my:"
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] "Rows: 899; Cols: 9"
##              TfIdf      term freq pos         cor.y    cor.y.abs  TfIdf.N
## condition 207.6197 condition  498 165 -0.0426588315 0.0426588315 82.44452
## new       124.1683       new  156 523 -0.0372353149 0.0372353149 50.77429
## used      121.8672      used  240 855  0.0146439599 0.0146439599 40.84209
## good      120.2664      good  197 342 -0.0002812515 0.0002812515 44.51689
## scratches 112.5796 scratches  254 687 -0.0061691062 0.0061691062 44.23566
## screen    104.9197    screen  210 689  0.0230340341 0.0230340341 36.36712
##            TfIdf.Y TfIdf.NA
## condition 56.11620 69.05897
## new       30.11628 43.27771
## used      39.75401 41.27105
## good      38.21866 37.53088
## scratches 36.53015 31.81375
## screen    37.40353 31.14910
##               TfIdf      term freq pos       cor.y  cor.y.abs   TfIdf.N
## correctly 5.0298080 correctly    5 186 -0.03730252 0.03730252 3.0178848
## headphone 2.0174744 headphone    2 364 -0.02152502 0.02152502 0.8646319
## guarantee 1.6250832 guarantee    1 348 -0.02152502 0.02152502 1.6250832
## real      1.2639536      real    1 638 -0.02152502 0.02152502 1.2639536
## dont      1.0341439      dont    1 248  0.02500407 0.02500407 0.0000000
## grey      0.7583722      grey    1 347  0.02500407 0.02500407 0.0000000
##             TfIdf.Y TfIdf.NA
## correctly 0.0000000 2.011923
## headphone 0.0000000 1.152843
## guarantee 0.0000000 0.000000
## real      0.0000000 0.000000
## dont      1.0341439 0.000000
## grey      0.7583722 0.000000
##             TfIdf    term freq pos       cor.y  cor.y.abs   TfIdf.N
## red     0.8125416     red    1 648          NA         NA 0.0000000
## version 0.8125416 version    1 862 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor    1  36  0.02500407 0.02500407 0.0000000
## divider 0.7583722 divider    1 243  0.02500407 0.02500407 0.0000000
## grey    0.7583722    grey    1 347  0.02500407 0.02500407 0.0000000
## hdmi    0.7583722    hdmi    1 363  0.02500407 0.02500407 0.0000000
##           TfIdf.Y  TfIdf.NA
## red     0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divider 0.7583722 0.0000000
## grey    0.7583722 0.0000000
## hdmi    0.7583722 0.0000000
##             TfIdf    term freq pos       cor.y  cor.y.abs   TfIdf.N
## red     0.8125416     red    1 648          NA         NA 0.0000000
## version 0.8125416 version    1 862 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor    1  36  0.02500407 0.02500407 0.0000000
## divider 0.7583722 divider    1 243  0.02500407 0.02500407 0.0000000
## grey    0.7583722    grey    1 347  0.02500407 0.02500407 0.0000000
## hdmi    0.7583722    hdmi    1 363  0.02500407 0.02500407 0.0000000
##           TfIdf.Y  TfIdf.NA
## red     0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divider 0.7583722 0.0000000
## grey    0.7583722 0.0000000
## hdmi    0.7583722 0.0000000
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## [1] "    Top_n stem TfIDf terms for descr.my:"
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] "Rows: 747; Cols: 9"
##            TfIdf    term freq pos         cor.y    cor.y.abs  TfIdf.N
## condit  207.7156  condit  499 137 -0.0418798096 0.0418798096 82.38883
## use     144.7700     use  291 709  0.0103720246 0.0103720246 51.46753
## scratch 126.4831 scratch  286 565 -0.0088060862 0.0088060862 49.35848
## new     124.1683     new  156 429 -0.0372353149 0.0372353149 50.77429
## good    120.3335    good  197 281 -0.0004368629 0.0004368629 44.58392
## screen  105.7897  screen  213 566  0.0232373651 0.0232373651 36.89203
##          TfIdf.Y TfIdf.NA
## condit  56.35056 68.97623
## use     47.66515 45.63736
## scratch 40.20165 36.92302
## new     30.11628 43.27771
## good    38.21866 37.53088
## screen  37.92265 30.97500
##              TfIdf     term freq pos        cor.y   cor.y.abs   TfIdf.N
## see      47.840365      see   53 573  0.004143357 0.004143357 17.631219
## small    31.644189    small   46 609 -0.001073854 0.001073854 11.168239
## upgrad    7.864791   upgrad    5 705  0.035370611 0.035370611  0.000000
## lighten   2.240183  lighten    2 362 -0.030076173 0.030076173  2.240183
## logic     1.625083    logic    1 374  0.025004068 0.025004068  0.000000
## discolor  1.421948 discolor    1 190 -0.021525023 0.021525023  1.421948
##            TfIdf.Y  TfIdf.NA
## see      16.501545 13.707601
## small     9.437755 11.038194
## upgrad    3.017885  4.846906
## lighten   0.000000  0.000000
## logic     1.625083  0.000000
## discolor  0.000000  0.000000
##             TfIdf    term freq pos       cor.y  cor.y.abs   TfIdf.N
## red     0.8125416     red    1 532          NA         NA 0.0000000
## version 0.8125416 version    1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor    1  31  0.02500407 0.02500407 0.0000000
## divid   0.7583722   divid    1 194  0.02500407 0.02500407 0.0000000
## grey    0.7583722    grey    1 286  0.02500407 0.02500407 0.0000000
## hdmi    0.7583722    hdmi    1 297  0.02500407 0.02500407 0.0000000
##           TfIdf.Y  TfIdf.NA
## red     0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid   0.7583722 0.0000000
## grey    0.7583722 0.0000000
## hdmi    0.7583722 0.0000000
##             TfIdf    term freq pos       cor.y  cor.y.abs   TfIdf.N
## red     0.8125416     red    1 532          NA         NA 0.0000000
## version 0.8125416 version    1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor    1  31  0.02500407 0.02500407 0.0000000
## divid   0.7583722   divid    1 194  0.02500407 0.02500407 0.0000000
## grey    0.7583722    grey    1 286  0.02500407 0.02500407 0.0000000
## hdmi    0.7583722    hdmi    1 297  0.02500407 0.02500407 0.0000000
##           TfIdf.Y  TfIdf.NA
## red     0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid   0.7583722 0.0000000
## grey    0.7583722 0.0000000
## hdmi    0.7583722 0.0000000
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
##                                   [,1]
## terms.n.post.stop         -0.080072993
## terms.n.post.stop.log     -0.063865173
## TfIdf.sum.post.stop       -0.030336619
## terms.n.post.stem         -0.079867739
## terms.n.post.stem.log     -0.063843117
## TfIdf.sum.post.stem       -0.032374274
## terms.n.stem.stop.Ratio    0.017579091
## TfIdf.sum.stem.stop.Ratio -0.001456838
##                           label step_major step_minor    bgn    end
## 5 extract.features_build.corpus          4          0 19.794 30.382
## 6  extract.features_extract.DTM          5          0 30.382     NA
##   elapsed
## 5  10.588
## 6      NA
## [1] "Extracting TfIDf terms for descr.my..."
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
##                          label step_major step_minor    bgn    end elapsed
## 6 extract.features_extract.DTM          5          0 30.382 31.608   1.226
## 7  extract.features_report.DTM          6          0 31.609     NA      NA
## [1] "Reporting TfIDf terms for descr.my..."
## [1] "   Full TermMatrix:"
## <<DocumentTermMatrix (documents: 2657, terms: 747)>>
## Non-/sparse entries: 8448/1976331
## Sparsity           : 100%
## Maximal term length: 15
## Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] "   Sparse TermMatrix:"
## <<DocumentTermMatrix (documents: 2657, terms: 8)>>
## Non-/sparse entries: 2072/19184
## Sparsity           : 90%
## Maximal term length: 7
## Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
## Warning in myplot_scatter(plt_TfIdf_df, "freq.full", "TfIdf.full",
## colorcol_name = "in.sprs"): converting in.sprs to class:factor

## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_path).

## Warning in rm(full_TfIdf_mtrx, full_TfIdf_df, melt_TfIdf_df,
## terms_TfIdf_df): object 'full_TfIdf_mtrx' not found
##                         label step_major step_minor    bgn    end elapsed
## 7 extract.features_report.DTM          6          0 31.609 33.722   2.113
## 8   extract.features_bind.DTM          7          0 33.722     NA      NA
## Loading required package: tidyr
## [1] "Binding DTM for descr.my..."
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero

## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
##                       label step_major step_minor    bgn    end elapsed
## 8 extract.features_bind.DTM          7          0 33.722 40.551   6.829
## 9 extract.features_bind.DXM          8          0 40.552     NA      NA
## [1] "Binding DXM for descr.my..."
## Warning in rm(log_X_df, txt_X_df): object 'log_X_df' not found

#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df

# Use model info provided in description
mydsp_obs(list(description.contains="a[[:digit:]]"), cols=glb_dsp_cols, all=TRUE)
##      UniqueID sold.fctr prdline.my sold .grpid   color condition cellular
## 618     10618         Y  iPad mini    1   <NA>   Black      Used        0
## 940     10940         N     iPad 3    0   <NA>   Black      Used        1
## 2472    12474      <NA>    Unknown   NA   <NA> Unknown      Used  Unknown
##      carrier storage
## 618     None      16
## 940  Verizon      16
## 2472 Unknown Unknown
##                                                                                                    descr.my
## 618  Nice Apple iPad Mini 16GB Wi- Fi 7.9&#034; spacegray MF432LL/ A A1432 Locked It does work just cannot 
## 940     LIKE NEW (MODEL A1430) + BLUETOOTH KEYBOARD (LATEST MODEL A1314), LEATHER CREAM SMART COVER, BLACK 
## 2472     here we have spacegray apple ipad mini a1432 no charger works great has small nicks nothing major
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "prdline.my"] <- "iPad mini"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "color"] <- "Space Gray"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "carrier"] <- "None"

mydsp_obs(list(description.contains="m(.{4})ll"), cols=glb_dsp_cols, all=TRUE)
##      UniqueID sold.fctr       prdline.my sold .grpid   color
## 617     10617         Y           iPad 2    1   <NA>   White
## 618     10618         Y        iPad mini    1   <NA>   Black
## 992     10992         N           iPad 2    0   <NA>   White
## 1105    11105         N iPad mini Retina    0   <NA>    Gold
## 1359    11360         N           iPad 3    0   <NA> Unknown
## 1360    11361         Y          Unknown    1   <NA> Unknown
## 1365    11366         Y           iPad 1    1   <NA> Unknown
## 2637    12639      <NA>           iPad 2   NA   <NA>   Black
##                     condition cellular carrier storage
## 617                      Used        0    None      64
## 618                      Used        0    None      16
## 992                      Used        0    None      16
## 1105                     Used        0    None      16
## 1359                     Used  Unknown Unknown Unknown
## 1360                     Used  Unknown Unknown Unknown
## 1365                     Used  Unknown Unknown Unknown
## 2637 For parts or not working        0    None      64
##                                                                                                     descr.my
## 617         This a used Apple iPad 2 64GB, Wi- Fi, 9.7in - White (MC991LL/ A) shows signs of wear, has been 
## 618   Nice Apple iPad Mini 16GB Wi- Fi 7.9&#034; spacegray MF432LL/ A A1432 Locked It does work just cannot 
## 992  Up for auction is this APPLE iPad 1st Gen Model MB292LL 16 GB of Memory Storage 9.7&#034; touch screen 
## 1105 Like New Condition Apple iPad Mini 3 MGYE2LL/ A 16GB Wi- Fi Gold Version Tablet/ eReader. Includes USB 
## 1359                  iPad 3 Black 64Gb storage Model Mc707ll/ a iPad is in very nice shape, glass and case 
## 1360   APPLE iPAD AIR 32GB WHITE MD789LL/ B WHITE. This item is Previously Lightly Used, in Good Condition. 
## 1365   Item still in complete working order, minor scratches, normal wear and tear but no damage. screen is 
## 2637  IPAD 2 64GB BLACK MODEL MC916LL/ A WIFI ONLY MODEL.  PICTURE OF IPAD IS ACTUAL UNIT YOU WILL RECEIVE.
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "color"] <- "Black"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "storage"] <- "64"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "carrier"] <- "None"

glb_allobs_df[glb_allobs_df$UniqueID == 11361, "prdline.my"] <- "iPad Air"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "storage"] <- "32"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "color"] <- "White"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "carrier"] <- "None"

# mydsp_obs(list(description.contains="mini(?!m)"), perl=TRUE, cols="D.P.mini", all=TRUE)
# mydsp_obs(list(D.P.mini=1), cols="D.P.mini", all=TRUE)
# mydsp_obs(list(D.P.mini=1, productline="Unknown"), cols="D.P.mini", all=TRUE)

# mydsp_obs(list(description.contains="(?<![fhp])air"), perl=TRUE, all=TRUE)
# mydsp_obs(list(description.contains="air"), perl=FALSE, cols="D.P.air", all=TRUE)
# mydsp_obs(list(D.P.air=1, productline="Unknown"), cols="D.P.air", all=TRUE)

print(mycreate_sqlxtab_df(glb_allobs_df, c("prdline.my", "productline", "D.P.mini",
                                           glb_rsp_var)))
##          prdline.my      productline D.P.mini sold.fctr  .n
## 1            iPad 2           iPad 2        0      <NA> 154
## 2            iPad 2           iPad 2        0         Y 147
## 3            iPad 2           iPad 2        0         N 139
## 4         iPad mini        iPad mini        0         N 138
## 5         iPad mini        iPad mini        0         Y 126
## 6            iPad 1           iPad 1        0         Y 125
## 7           Unknown          Unknown        0         N 121
## 8         iPad mini        iPad mini        0      <NA> 108
## 9          iPad Air         iPad Air        0         N 102
## 10           iPad 1           iPad 1        0         N 100
## 11       iPad Air 2       iPad Air 2        0         N 100
## 12           iPad 4           iPad 4        0         N  93
## 13          Unknown          Unknown        0      <NA>  89
## 14           iPad 1           iPad 1        0      <NA>  88
## 15          Unknown          Unknown        0         Y  80
## 16           iPad 3           iPad 3        0         Y  80
## 17         iPad Air         iPad Air        0         Y  78
## 18         iPad Air         iPad Air        0      <NA>  74
## 19           iPad 3           iPad 3        0         N  73
## 20       iPad Air 2       iPad Air 2        0         Y  71
## 21           iPad 4           iPad 4        0      <NA>  68
## 22           iPad 4           iPad 4        0         Y  64
## 23       iPad Air 2       iPad Air 2        0      <NA>  62
## 24      iPad mini 3      iPad mini 3        0         N  61
## 25      iPad mini 2      iPad mini 2        0         N  56
## 26           iPad 3           iPad 3        0      <NA>  55
## 27      iPad mini 2      iPad mini 2        0      <NA>  52
## 28      iPad mini 2      iPad mini 2        0         Y  48
## 29      iPad mini 3      iPad mini 3        0      <NA>  35
## 30      iPad mini 3      iPad mini 3        0         Y  27
## 31        iPad mini        iPad mini        1         N   7
## 32        iPad mini        iPad mini        1         Y   5
## 33      iPad mini 2      iPad mini 2        1      <NA>   4
## 34 iPad mini Retina iPad mini Retina        0         Y   4
## 35        iPad mini        iPad mini        1      <NA>   3
## 36      iPad mini 3      iPad mini 3        1      <NA>   3
## 37 iPad mini Retina iPad mini Retina        0         N   3
## 38          Unknown          Unknown        1      <NA>   2
## 39      iPad mini 2      iPad mini 2        1         N   2
## 40      iPad mini 3      iPad mini 3        1         N   2
## 41          Unknown          Unknown        1         N   1
## 42          Unknown          Unknown        1         Y   1
## 43           iPad 5           iPad 5        0         Y   1
## 44         iPad Air          Unknown        0         Y   1
## 45        iPad mini          Unknown        1      <NA>   1
## 46        iPad mini        iPad mini        2         Y   1
## 47      iPad mini 2      iPad mini 2        1         Y   1
## 48 iPad mini Retina iPad mini Retina        1         N   1
print(glb_allobs_df[(glb_allobs_df$productline == "Unknown") & 
                    (glb_allobs_df$D.P.mini > 0), 
                    c(glb_id_var, glb_category_var, glb_dsp_cols, glb_txt_vars)])
##      UniqueID prdline.my sold .grpid      color                condition
## 1172    11172    Unknown    0      8    Unknown                     Used
## 1803    11804    Unknown    1   <NA>      White       Seller refurbished
## 2223    12225    Unknown   NA      8    Unknown                     Used
## 2472    12474  iPad mini   NA   <NA> Space Gray                     Used
## 2623    12625    Unknown   NA   <NA>      White For parts or not working
##      cellular carrier storage
## 1172  Unknown Unknown      16
## 1803        1    AT&T Unknown
## 2223  Unknown Unknown      16
## 2472        0    None Unknown
## 2623  Unknown Unknown Unknown
##                                                                                                    descr.my
## 1172     IPAD mini .  not sure of what generation it can be.  selling as is or best offer. had a crack but 
## 1803    30 Day Warranty.  Refurbished iPad Mini with signs of normal wear including possible scratching on 
## 2223     IPAD mini .  not sure of what generation it can be.  selling as is or best offer. had a crack but 
## 2472     here we have spacegray apple ipad mini a1432 no charger works great has small nicks nothing major 
## 2623 Lot of 10 mixed iPad minis. Colors, models &amp; storage capacity vary between each lot. There may be
glb_allobs_df[(glb_allobs_df$D.P.mini == 1) & (glb_allobs_df$productline == "Unknown"),
              "prdline.my"] <- "iPad mini"

print(mycreate_sqlxtab_df(glb_allobs_df, c("prdline.my", "productline", "D.P.air",
                                           glb_rsp_var)))
##          prdline.my      productline D.P.air sold.fctr  .n
## 1            iPad 2           iPad 2       0      <NA> 154
## 2            iPad 2           iPad 2       0         Y 147
## 3         iPad mini        iPad mini       0         N 145
## 4            iPad 2           iPad 2       0         N 139
## 5         iPad mini        iPad mini       0         Y 132
## 6            iPad 1           iPad 1       0         Y 125
## 7           Unknown          Unknown       0         N 120
## 8         iPad mini        iPad mini       0      <NA> 111
## 9            iPad 1           iPad 1       0         N 100
## 10         iPad Air         iPad Air       0         N  98
## 11       iPad Air 2       iPad Air 2       0         N  97
## 12           iPad 4           iPad 4       0         N  92
## 13          Unknown          Unknown       0      <NA>  88
## 14           iPad 1           iPad 1       0      <NA>  88
## 15          Unknown          Unknown       0         Y  80
## 16           iPad 3           iPad 3       0         Y  79
## 17         iPad Air         iPad Air       0         Y  75
## 18           iPad 3           iPad 3       0         N  73
## 19         iPad Air         iPad Air       0      <NA>  73
## 20       iPad Air 2       iPad Air 2       0         Y  69
## 21           iPad 4           iPad 4       0      <NA>  68
## 22           iPad 4           iPad 4       0         Y  64
## 23      iPad mini 3      iPad mini 3       0         N  63
## 24       iPad Air 2       iPad Air 2       0      <NA>  60
## 25      iPad mini 2      iPad mini 2       0         N  58
## 26           iPad 3           iPad 3       0      <NA>  55
## 27      iPad mini 2      iPad mini 2       0      <NA>  55
## 28      iPad mini 2      iPad mini 2       0         Y  49
## 29      iPad mini 3      iPad mini 3       0      <NA>  38
## 30      iPad mini 3      iPad mini 3       0         Y  27
## 31         iPad Air         iPad Air       1         N   4
## 32 iPad mini Retina iPad mini Retina       0         N   4
## 33 iPad mini Retina iPad mini Retina       0         Y   4
## 34         iPad Air         iPad Air       1         Y   3
## 35        iPad mini          Unknown       0      <NA>   3
## 36       iPad Air 2       iPad Air 2       1      <NA>   2
## 37       iPad Air 2       iPad Air 2       1         N   2
## 38       iPad Air 2       iPad Air 2       1         Y   2
## 39          Unknown          Unknown       1      <NA>   1
## 40          Unknown          Unknown       1         N   1
## 41           iPad 3           iPad 3       1         Y   1
## 42           iPad 4           iPad 4       1         N   1
## 43           iPad 5           iPad 5       0         Y   1
## 44         iPad Air          Unknown       1         Y   1
## 45         iPad Air         iPad Air       1      <NA>   1
## 46       iPad Air 2       iPad Air 2       2         N   1
## 47        iPad mini          Unknown       0         N   1
## 48        iPad mini          Unknown       0         Y   1
## 49      iPad mini 2      iPad mini 2       1      <NA>   1
print(glb_allobs_df[(glb_allobs_df$productline == "Unknown") & 
                    (glb_allobs_df$D.P.air > 0), 
                    c(glb_id_var, glb_category_var, glb_dsp_cols, glb_txt_vars)])
##      UniqueID prdline.my sold .grpid      color condition cellular carrier
## 946     10946    Unknown    0   <NA>    Unknown      Used  Unknown Unknown
## 1360    11361   iPad Air    1   <NA>      White      Used        0    None
## 2433    12435    Unknown   NA   <NA> Space Gray      Used  Unknown Unknown
##      storage
## 946  Unknown
## 1360      32
## 2433     128
##                                                                                                   descr.my
## 946     Gently used apple iPad Air, no scratches on screen and almost no visible wear on back of item. No 
## 1360 APPLE iPAD AIR 32GB WHITE MD789LL/ B WHITE. This item is Previously Lightly Used, in Good Condition. 
## 2433    ***128gb***  black/ spacegray iPad Air excellent used condition(no scratches, dents, or blemishes)
#glb_allobs_df[glb_allobs_df$UniqueID == 11863, "D.P.air"] <- 0
glb_allobs_df[(glb_allobs_df$D.P.air == 1) & (glb_allobs_df$productline == "Unknown"),
              "prdline.my"] <- "iPad Air"

print(glb_allobs_df[(glb_allobs_df$UniqueID %in% c(11767, 11811, 12156)),
                    c(glb_id_var, "sold",
    "prdline.my", "color", "condition", "cellular", "carrier", "storage", "descr.my")])
##      UniqueID sold prdline.my   color                condition cellular
## 1766    11767    0    Unknown Unknown For parts or not working  Unknown
## 1810    11811    0    Unknown   Black       Seller refurbished        0
## 2154    12156   NA    Unknown   Black                     Used        0
##      carrier storage
## 1766 Unknown Unknown
## 1810    None Unknown
## 2154    None      32
##                                                                                                 descr.my
## 1766                    Ipad 2 32gb Housing. Some scratches and small dents, but overall good condition.
## 1810 30 Day Warranty.  Refurbished iPad 2 with scratching on screen and wear on back plate.  Comes with 
## 2154  Original IPAD 1st generation - used one owner (myself)Good shape as pictured. Fully functional as
glb_allobs_df[glb_allobs_df$UniqueID == 11767, "prdline.my"] <- "iPad 2"
glb_allobs_df[glb_allobs_df$UniqueID == 11767, "storage"] <- "32"
glb_allobs_df[glb_allobs_df$UniqueID == 11811, "prdline.my"] <- "iPad 2"
glb_allobs_df[glb_allobs_df$UniqueID == 12156, "prdline.my"] <- "iPad 1"

# mydsp_obs(list(prdline.my="Unknown"), all=TRUE)

tmp_allobs_df <- glb_allobs_df[, "prdline.my", FALSE]
names(tmp_allobs_df) <- "old.prdline.my"
glb_allobs_df$prdline.my <-
    plyr::revalue(glb_allobs_df$prdline.my, c(      
        # "iPad 1"    = "iPad",
        # "iPad 2"    = "iPad2+",
        "iPad 3"    = "iPad 3+",
        "iPad 4"    = "iPad 3+",
        "iPad 5"    = "iPad 3+",
        
        "iPad Air"      = "iPadAir",
        "iPad Air 2"    = "iPadAir",
        
        "iPad mini"         = "iPadmini",
        "iPad mini 2"       = "iPadmini 2+",
        "iPad mini 3"       = "iPadmini 2+",
        "iPad mini Retina"  = "iPadmini 2+"
    ))
tmp_allobs_df$prdline.my <- glb_allobs_df[, "prdline.my"]
print(mycreate_sqlxtab_df(tmp_allobs_df, c("prdline.my", "old.prdline.my")))
##     prdline.my   old.prdline.my  .n
## 1       iPad 2           iPad 2 442
## 2     iPadmini        iPad mini 393
## 3       iPad 1           iPad 1 314
## 4      Unknown          Unknown 285
## 5      iPadAir         iPad Air 257
## 6      iPadAir       iPad Air 2 233
## 7      iPad 3+           iPad 4 225
## 8      iPad 3+           iPad 3 208
## 9  iPadmini 2+      iPad mini 2 163
## 10 iPadmini 2+      iPad mini 3 128
## 11 iPadmini 2+ iPad mini Retina   8
## 12     iPad 3+           iPad 5   1
print(mycreate_sqlxtab_df(tmp_allobs_df, c("prdline.my")))
##    prdline.my  .n
## 1     iPadAir 490
## 2      iPad 2 442
## 3     iPad 3+ 434
## 4    iPadmini 393
## 5      iPad 1 314
## 6 iPadmini 2+ 299
## 7     Unknown 285
print(mycreate_sqlxtab_df(subset(glb_allobs_df, color == "Unknown"), 
                        c("color", "D.P.black", "D.P.gold", "D.P.spacegray", "D.P.white")))
##     color D.P.black D.P.gold D.P.spacegray D.P.white   .n
## 1 Unknown         0        0             0         0 1017
## 2 Unknown         0        0             0         1    4
## 3 Unknown         1        0             0         0    4
## 4 Unknown         0        0             1         0    1
## 5 Unknown         1        0             0         1    1
print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.black > 0), 
                    c(glb_id_var, "color", "D.P.black", "sold", "prdline.my", "condition",
                      "cellular", "carrier", "storage", "descr.my")])
##      UniqueID   color D.P.black sold prdline.my condition cellular carrier
## 631     10631 Unknown         1    1     iPad 2      Used        1    AT&T
## 683     10683 Unknown         1    0     iPad 2      Used        0    None
## 858     10858 Unknown         1    1    iPad 3+      Used        0    None
## 1243    11244 Unknown         1    0    Unknown      Used  Unknown Unknown
## 2135    12137 Unknown         1   NA     iPad 1      Used        1    AT&T
##      storage
## 631       16
## 683       32
## 858       16
## 1243 Unknown
## 2135      16
##                                                                                                     descr.my
## 631        Very good condition. Minor bumps and bruises. Only scratches on screen are in non- viewing black 
## 683       Comes with folding black case and is engraved in small letters on the back.  Still works perfectly
## 858                                                 screen cracked. name engraving in the back (blacked out)
## 1243       Ipad is in fair condition. Minor scratches on back. Edge around screen is black instead of white.
## 2135 Device is in AVERAGE used cosmetic condition with heavy scratches and wear. Color is black . Device is
glb_allobs_df[glb_allobs_df$UniqueID == 12137, "color"] <- "Black"

print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.spacegray > 0),
                    c(glb_id_var, "color", "D.P.spacegray", "prdline.my", "condition",
                      "cellular", "carrier", "storage", "descr.my")])
##      UniqueID   color D.P.spacegray prdline.my condition cellular carrier
## 2104    12106 Unknown             1    iPadAir      Used        0    None
##      storage
## 2104      16
##                                                                                                            descr.my
## 2104 This is an iPad Air first generation (spacegray color). It&#039;s a used iPad (just like new) as shown in the
glb_allobs_df[glb_allobs_df$UniqueID %in% c(12106), "color"] <- "Space Gray"

print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.white > 0),
                    c(glb_id_var, "color", "D.P.white", "prdline.my", "condition",
                      "cellular", "carrier", "storage", "descr.my")])
##      UniqueID   color D.P.white  prdline.my                condition
## 573     10573 Unknown         1 iPadmini 2+                     Used
## 809     10809 Unknown         1     iPad 3+                     Used
## 925     10925 Unknown         1 iPadmini 2+                     Used
## 1243    11244 Unknown         1     Unknown                     Used
## 1734    11735 Unknown         1     iPad 3+ For parts or not working
##      cellular carrier storage
## 573         0    None      16
## 809         0    None      64
## 925         0    None      64
## 1243  Unknown Unknown Unknown
## 1734        1 Verizon      16
##                                                                                                        descr.my
## 573                Like new white iPad mini no scratches always kept in case, sold with keyboard, box and cords
## 809          iPad 3 gen. 64GB, white, wifi- only. Condition = good as new, very minor sign of use. No charger. 
## 925  iPad mini 2/ Retina Display/ Latest Model/ 64GB/ Wi- Fi/ Silver&amp;White . Near Mint Condition excellent 
## 1243          Ipad is in fair condition. Minor scratches on back. Edge around screen is black instead of white.
## 1734             Device is in POOR used cosmetic condition with cracked outer glass. Color is White. Device is
glb_allobs_df[glb_allobs_df$UniqueID %in% c(10573, 10809, 10925, 11735), "color"] <-
    "White"

glb_allobs_df$carrier.fctr <- as.factor(glb_allobs_df$carrier)
glb_allobs_df$cellular.fctr <- as.factor(glb_allobs_df$cellular)
glb_allobs_df$color.fctr <- as.factor(glb_allobs_df$color)
glb_allobs_df$prdline.my.fctr <- as.factor(glb_allobs_df$prdline.my)
glb_allobs_df$storage.fctr <- as.factor(glb_allobs_df$storage)

#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
# glb_allobs_df %>% 
#     unite(prdl.my.descr, c(prdline.my, as.numeric(D.nchrs.log > 0), sep="#"))
glb_allobs_df$prdl.my.descr.fctr <- as.factor(paste(glb_allobs_df$prdline.my,
                                                as.numeric(glb_allobs_df$D.nchrs.log > 0),
                                                sep="#"))
print(table(glb_allobs_df$prdl.my.descr.fctr, glb_allobs_df$sold, useNA = "ifany"))
##                
##                   0   1 <NA>
##   Unknown#0      72  47   45
##   Unknown#1      46  33   42
##   iPad 1#0       53  69   46
##   iPad 1#1       47  56   43
##   iPad 2#0       57  80   83
##   iPad 2#1       84  67   71
##   iPad 3+#0      58  87   59
##   iPad 3+#1     108  58   64
##   iPadAir#0     125  95   88
##   iPadAir#1      78  55   49
##   iPadmini 2+#0  95  59   64
##   iPadmini 2+#1  30  21   30
##   iPadmini#0     94  79   65
##   iPadmini#1     52  54   49
print(mycreate_sqlxtab_df(glb_allobs_df, c("prdl.my.descr.fctr", "sold")))
##    prdl.my.descr.fctr sold  .n
## 1           iPadAir#0    0 125
## 2           iPad 3+#1    0 108
## 3           iPadAir#0    1  95
## 4       iPadmini 2+#0    0  95
## 5          iPadmini#0    0  94
## 6           iPadAir#0   NA  88
## 7           iPad 3+#0    1  87
## 8            iPad 2#1    0  84
## 9            iPad 2#0   NA  83
## 10           iPad 2#0    1  80
## 11         iPadmini#0    1  79
## 12          iPadAir#1    0  78
## 13          Unknown#0    0  72
## 14           iPad 2#1   NA  71
## 15           iPad 1#0    1  69
## 16           iPad 2#1    1  67
## 17         iPadmini#0   NA  65
## 18          iPad 3+#1   NA  64
## 19      iPadmini 2+#0   NA  64
## 20          iPad 3+#0   NA  59
## 21      iPadmini 2+#0    1  59
## 22          iPad 3+#0    0  58
## 23          iPad 3+#1    1  58
## 24           iPad 2#0    0  57
## 25           iPad 1#1    1  56
## 26          iPadAir#1    1  55
## 27         iPadmini#1    1  54
## 28           iPad 1#0    0  53
## 29         iPadmini#1    0  52
## 30          iPadAir#1   NA  49
## 31         iPadmini#1   NA  49
## 32          Unknown#0    1  47
## 33           iPad 1#1    0  47
## 34          Unknown#1    0  46
## 35           iPad 1#0   NA  46
## 36          Unknown#0   NA  45
## 37           iPad 1#1   NA  43
## 38          Unknown#1   NA  42
## 39          Unknown#1    1  33
## 40      iPadmini 2+#1   NA  30
## 41      iPadmini 2+#1    0  30
## 42      iPadmini 2+#1    1  21
glb_category_var <- "prdl.my.descr.fctr"

# print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
# print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))

# print(myplot_scatter(glb_trnobs_df, "<col1_name>", "<col2_name>", smooth=TRUE))

rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr, 
   glb_full_DTM_lst, glb_sprs_DTM_lst, txt_corpus, txt_vctr)
## Warning in rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr,
## glb_full_DTM_lst, : object 'corpus_lst' not found
## Warning in rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr,
## glb_full_DTM_lst, : object 'full_TfIdf_vctr' not found
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, "extract.features_end", 
                                     major.inc=TRUE)
##                        label step_major step_minor    bgn    end elapsed
## 9  extract.features_bind.DXM          8          0 40.552 51.802   11.25
## 10      extract.features_end          9          0 51.802     NA      NA
myplt_chunk(extract.features_chunk_df)
##                                                    label step_major
## 9                              extract.features_bind.DXM          8
## 5                          extract.features_build.corpus          4
## 8                              extract.features_bind.DTM          7
## 7                            extract.features_report.DTM          6
## 3                          extract.features_process.text          3
## 6                           extract.features_extract.DTM          5
## 2                    extract.features_factorize.str.vars          2
## 1                                   extract.features_bgn          1
## 4 extract.features_process.text_reporting_compound_terms          3
##   step_minor    bgn    end elapsed duration
## 9          0 40.552 51.802  11.250   11.250
## 5          0 19.794 30.382  10.588   10.588
## 8          0 33.722 40.551   6.829    6.829
## 7          0 31.609 33.722   2.113    2.113
## 3          0 18.174 19.789   1.615    1.615
## 6          0 30.382 31.608   1.226    1.226
## 2          0 17.889 18.173   0.284    0.284
## 1          0 17.874 17.888   0.014    0.014
## 4          1 19.789 19.794   0.005    0.005
## [1] "Total Elapsed Time: 51.802 secs"

# if (glb_save_envir)
#     save(glb_feats_df, 
#          glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
#          file=paste0(glb_out_pfx, "extract_features_dsk.RData"))
# load(paste0(glb_out_pfx, "extract_features_dsk.RData"))

replay.petrisim(pn=glb_analytics_pn, 
    replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs, 
        "data.training.all","data.new")), flip_coord=TRUE)
## time trans    "bgn " "fit.data.training.all " "predict.data.new " "end " 
## 0.0000   multiple enabled transitions:  data.training.all data.new model.selected    firing:  data.training.all 
## 1.0000    1   2 1 0 0 
## 1.0000   multiple enabled transitions:  data.training.all data.new model.selected model.final data.training.all.prediction   firing:  data.new 
## 2.0000    2   1 1 1 0

glb_chunks_df <- myadd_chunk(glb_chunks_df, "cluster.data", major.inc=TRUE)
##              label step_major step_minor    bgn    end elapsed
## 5 extract.features          3          0 17.868 53.117  35.249
## 6     cluster.data          4          0 53.117     NA      NA

Step 4.0: cluster data

glb_chunks_df <- myadd_chunk(glb_chunks_df, "manage.missing.data", major.inc=FALSE)
##                 label step_major step_minor    bgn    end elapsed
## 6        cluster.data          4          0 53.117 54.646   1.529
## 7 manage.missing.data          4          1 54.646     NA      NA
# If mice crashes with error: Error in get(as.character(FUN), mode = "function", envir = envir) : object 'State' of mode 'function' was not found
#   consider excluding 'State' as a feature

# print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
# print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))
# glb_trnobs_df <- na.omit(glb_trnobs_df)
# glb_newobs_df <- na.omit(glb_newobs_df)
# df[is.na(df)] <- 0

mycheck_problem_data(glb_allobs_df)
## [1] "numeric data missing in : "
##      sold sold.fctr 
##       798       798 
## [1] "numeric data w/ 0s in : "
##                biddable                    sold          startprice.log 
##                    1444                     999                      31 
##           cellular.fctr     D.terms.n.post.stop D.terms.n.post.stop.log 
##                    1600                    1521                    1521 
##   D.TfIdf.sum.post.stop     D.terms.n.post.stem D.terms.n.post.stem.log 
##                    1521                    1521                    1521 
##   D.TfIdf.sum.post.stem              D.T.condit                 D.T.use 
##                    1521                    2158                    2366 
##             D.T.scratch                 D.T.new                D.T.good 
##                    2371                    2501                    2460 
##              D.T.screen               D.T.great                D.T.ipad 
##                    2444                    2532                    2425 
##                D.T.work               D.T.excel                D.T.like 
##                    2459                    2557                    2584 
##                 D.T.box           D.T.function.                D.T.item 
##                    2547                    2541                    2528 
##               D.T.fulli              D.T.cosmet               D.T.minor 
##                    2569                    2540                    2540 
##                D.T.mint               D.T.crack                D.T.wear 
##                    2594                    2580                    2556 
##             D.T.perfect              D.T.includ                D.T.lock 
##                    2602                    2574                    2614 
##                D.T.case              D.T.icloud                 D.T.see 
##                    2575                    2601                    2604 
##               D.T.light               D.T.devic               D.T.pleas 
##                    2576                    2577                    2590 
##                D.T.back              D.T.origin                D.T.dent 
##                    2580                    2599                    2592 
##                D.T.hous                D.T.sign                D.T.open 
##                    2585                    2580                    2613 
##               D.T.clean                D.T.will                D.T.appl 
##                    2615                    2618                    2598 
##             D.T.charger               D.T.damag                D.T.X100 
##                    2619                    2626                    2593 
##                D.T.come               D.T.scuff              D.T.corner 
##                    2602                    2615                    2612 
##               D.T.small              D.T.broken            D.T.descript 
##                    2611                    2637                    2624 
##                D.T.unit           D.T.refurbish                D.T.show 
##                    2617                    2623                    2606 
##               D.T.shape                D.T.read                D.T.test 
##                    2632                    2626                    2620 
##              D.T.pictur                D.T.bare               D.T.brand 
##                    2624                    2637                    2627 
##                D.T.list                 D.T.may                D.T.mark 
##                    2616                    2619                    2629 
##             D.T.blemish              D.T.packag                D.T.mini 
##                    2625                    2631                    2623 
##              D.T.affect              D.T.normal                 D.T.tab 
##                    2629                    2626                    2630 
##                 D.T.top           D.T.accessori                D.T.ding 
##                    2633                    2629                    2632 
##                D.T.near               D.T.digit               D.T.photo 
##                    2623                    2639                    2634 
##                D.T.tear             D.T.display               D.T.minim 
##                    2626                    2634                    2629 
##                D.T.wifi               D.T.order           D.T.protector 
##                    2632                    2636                    2639 
##                D.T.kept               D.T.right            D.T.previous 
##                    2637                    2638                    2634 
##              D.T.button               D.T.alway             D.T.contact 
##                    2638                    2639                    2642 
##                D.T.fair                 D.T.air                 D.T.esn 
##                    2635                    2636                    2641 
##                D.T.full              D.T.averag                D.T.free 
##                    2641                    2642                    2638 
##                D.T.sinc                D.T.imei                D.T.cabl 
##                    2640                    2640                    2639 
##                D.T.seal          D.T.profession              D.T.overal 
##                    2647                    2641                    2643 
##              D.T.retail               D.T.refer                D.T.left 
##                    2648                    2646                    2646 
##               D.T.stock                 D.T.two              D.T.detail 
##                    2643                    2648                    2650 
##                D.T.bodi              D.T.seller               D.T.activ 
##                    2648                    2643                    2648 
##               D.T.phone             D.T.problem          D.T.manufactur 
##                    2647                    2651                    2649 
##                D.T.side             D.T.certifi                D.T.ship 
##                    2648                    2647                    2646 
##                D.T.chip                 D.T.edg             D.T.inspect 
##                    2651                    2647                    2648 
##             D.T.heavili            D.T.keyboard                 D.T.non 
##                    2646                    2651                    2649 
##                D.T.geek               D.T.squad             D.T.handset 
##                    2652                    2652                    2650 
##               D.T.upper             D.T.sticker              D.T.scroll 
##                    2651                    2649                    2652 
##                D.T.must             D.T.contain                D.T.imag 
##                    2649                    2652                    2654 
##             D.T.qualiti               D.T.anoth                 D.T.pic 
##                    2651                    2652                    2653 
##               D.T.least             D.T.correct              D.T.featur 
##                    2653                    2652                    2652 
##          D.T.technician               D.T.super              D.T.expect 
##                    2652                    2655                    2655 
##                D.T.sync             D.T.speaker                D.T.name 
##                    2652                    2654                    2654 
##              D.T.lightn               D.T.X2016             D.T.passcod 
##                    2652                    2653                    2654 
##               D.T.money                 D.T.els              D.T.stylus 
##                    2655                    2654                    2655 
##              D.T.corpor               D.T.intro              D.T.higher 
##                    2655                    2656                    2656 
##               D.T.beetl              D.T.defens            D.T.disclaim 
##                    2656                    2656                    2656 
##             D.T.essenti               D.T.final             D.T.repeat. 
##                    2656                    2656                    2656 
##             D.nwrds.log         D.nwrds.unq.log             D.sum.TfIdf 
##                    1520                    1521                    1521 
## D.ratio.sum.TfIdf.nwrds             D.nchrs.log             D.nuppr.log 
##                    1521                    1520                    1522 
##             D.ndgts.log           D.npnct01.log           D.npnct02.log 
##                    2427                    2579                    2657 
##           D.npnct03.log           D.npnct04.log           D.npnct05.log 
##                    2614                    2657                    2592 
##           D.npnct06.log           D.npnct07.log           D.npnct08.log 
##                    2554                    2656                    2581 
##           D.npnct09.log           D.npnct10.log           D.npnct11.log 
##                    2641                    2648                    2301 
##           D.npnct12.log           D.npnct13.log           D.npnct14.log 
##                    2538                    1932                    2582 
##           D.npnct15.log           D.npnct16.log           D.npnct17.log 
##                    2637                    2546                    2657 
##           D.npnct18.log           D.npnct19.log           D.npnct20.log 
##                    2656                    2657                    2657 
##           D.npnct21.log           D.npnct22.log           D.npnct23.log 
##                    2657                    2657                    2657 
##           D.npnct24.log           D.npnct25.log           D.npnct26.log 
##                    1520                    2657                    2657 
##           D.npnct27.log           D.npnct28.log           D.npnct29.log 
##                    2657                    2649                    2657 
##           D.npnct30.log         D.nstopwrds.log                D.P.http 
##                    2657                    1664                    2657 
##                D.P.mini                 D.P.air               D.P.black 
##                    2623                    2636                    2640 
##               D.P.white                D.P.gold           D.P.spacegray 
##                    2647                    2655                    2650 
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description   condition    cellular     carrier       color     storage 
##        1520           0           0           0           0           0 
## productline      .grpid  prdline.my    descr.my 
##           0          NA           0        1520
# glb_allobs_df <- na.omit(glb_allobs_df)

# Not refactored into mydsutils.R since glb_*_df might be reassigned
glb_impute_missing_data <- function() {
    
    require(mice)
    set.seed(glb_mice_complete.seed)
    inp_impent_df <- glb_allobs_df[, setdiff(names(glb_allobs_df), 
                                union(glb_exclude_vars_as_features, glb_rsp_var))]
    print("Summary before imputation: ")
    print(summary(inp_impent_df))
    out_impent_df <- complete(mice(inp_impent_df))
    print(summary(out_impent_df))
    
    ret_vars <- sapply(names(out_impent_df), 
                       function(col) ifelse(!identical(out_impent_df[, col],
                                                       inp_impent_df[, col]), 
                                            col, ""))
    ret_vars <- ret_vars[ret_vars != ""]
    
    # complete(mice()) changes attributes of factors even though values don't change
    for (col in ret_vars) {
        if (inherits(out_impent_df[, col], "factor")) {
            if (identical(as.numeric(out_impent_df[, col]), 
                          as.numeric(inp_impent_df[, col])))
                ret_vars <- setdiff(ret_vars, col)
        }
    }
    return(out_impent_df[, ret_vars])
}

if (glb_impute_na_data && 
    (length(myfind_numerics_missing(glb_allobs_df)) > 0) &&
    (ncol(nonna_df <- glb_impute_missing_data()) > 0)) {
    for (col in names(nonna_df)) {
        glb_allobs_df[, paste0(col, ".nonNA")] <- nonna_df[, col]
        glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, col)        
    }
}    
    
mycheck_problem_data(glb_allobs_df, terminate = TRUE)
## [1] "numeric data missing in : "
##      sold sold.fctr 
##       798       798 
## [1] "numeric data w/ 0s in : "
##                biddable                    sold          startprice.log 
##                    1444                     999                      31 
##           cellular.fctr     D.terms.n.post.stop D.terms.n.post.stop.log 
##                    1600                    1521                    1521 
##   D.TfIdf.sum.post.stop     D.terms.n.post.stem D.terms.n.post.stem.log 
##                    1521                    1521                    1521 
##   D.TfIdf.sum.post.stem              D.T.condit                 D.T.use 
##                    1521                    2158                    2366 
##             D.T.scratch                 D.T.new                D.T.good 
##                    2371                    2501                    2460 
##              D.T.screen               D.T.great                D.T.ipad 
##                    2444                    2532                    2425 
##                D.T.work               D.T.excel                D.T.like 
##                    2459                    2557                    2584 
##                 D.T.box           D.T.function.                D.T.item 
##                    2547                    2541                    2528 
##               D.T.fulli              D.T.cosmet               D.T.minor 
##                    2569                    2540                    2540 
##                D.T.mint               D.T.crack                D.T.wear 
##                    2594                    2580                    2556 
##             D.T.perfect              D.T.includ                D.T.lock 
##                    2602                    2574                    2614 
##                D.T.case              D.T.icloud                 D.T.see 
##                    2575                    2601                    2604 
##               D.T.light               D.T.devic               D.T.pleas 
##                    2576                    2577                    2590 
##                D.T.back              D.T.origin                D.T.dent 
##                    2580                    2599                    2592 
##                D.T.hous                D.T.sign                D.T.open 
##                    2585                    2580                    2613 
##               D.T.clean                D.T.will                D.T.appl 
##                    2615                    2618                    2598 
##             D.T.charger               D.T.damag                D.T.X100 
##                    2619                    2626                    2593 
##                D.T.come               D.T.scuff              D.T.corner 
##                    2602                    2615                    2612 
##               D.T.small              D.T.broken            D.T.descript 
##                    2611                    2637                    2624 
##                D.T.unit           D.T.refurbish                D.T.show 
##                    2617                    2623                    2606 
##               D.T.shape                D.T.read                D.T.test 
##                    2632                    2626                    2620 
##              D.T.pictur                D.T.bare               D.T.brand 
##                    2624                    2637                    2627 
##                D.T.list                 D.T.may                D.T.mark 
##                    2616                    2619                    2629 
##             D.T.blemish              D.T.packag                D.T.mini 
##                    2625                    2631                    2623 
##              D.T.affect              D.T.normal                 D.T.tab 
##                    2629                    2626                    2630 
##                 D.T.top           D.T.accessori                D.T.ding 
##                    2633                    2629                    2632 
##                D.T.near               D.T.digit               D.T.photo 
##                    2623                    2639                    2634 
##                D.T.tear             D.T.display               D.T.minim 
##                    2626                    2634                    2629 
##                D.T.wifi               D.T.order           D.T.protector 
##                    2632                    2636                    2639 
##                D.T.kept               D.T.right            D.T.previous 
##                    2637                    2638                    2634 
##              D.T.button               D.T.alway             D.T.contact 
##                    2638                    2639                    2642 
##                D.T.fair                 D.T.air                 D.T.esn 
##                    2635                    2636                    2641 
##                D.T.full              D.T.averag                D.T.free 
##                    2641                    2642                    2638 
##                D.T.sinc                D.T.imei                D.T.cabl 
##                    2640                    2640                    2639 
##                D.T.seal          D.T.profession              D.T.overal 
##                    2647                    2641                    2643 
##              D.T.retail               D.T.refer                D.T.left 
##                    2648                    2646                    2646 
##               D.T.stock                 D.T.two              D.T.detail 
##                    2643                    2648                    2650 
##                D.T.bodi              D.T.seller               D.T.activ 
##                    2648                    2643                    2648 
##               D.T.phone             D.T.problem          D.T.manufactur 
##                    2647                    2651                    2649 
##                D.T.side             D.T.certifi                D.T.ship 
##                    2648                    2647                    2646 
##                D.T.chip                 D.T.edg             D.T.inspect 
##                    2651                    2647                    2648 
##             D.T.heavili            D.T.keyboard                 D.T.non 
##                    2646                    2651                    2649 
##                D.T.geek               D.T.squad             D.T.handset 
##                    2652                    2652                    2650 
##               D.T.upper             D.T.sticker              D.T.scroll 
##                    2651                    2649                    2652 
##                D.T.must             D.T.contain                D.T.imag 
##                    2649                    2652                    2654 
##             D.T.qualiti               D.T.anoth                 D.T.pic 
##                    2651                    2652                    2653 
##               D.T.least             D.T.correct              D.T.featur 
##                    2653                    2652                    2652 
##          D.T.technician               D.T.super              D.T.expect 
##                    2652                    2655                    2655 
##                D.T.sync             D.T.speaker                D.T.name 
##                    2652                    2654                    2654 
##              D.T.lightn               D.T.X2016             D.T.passcod 
##                    2652                    2653                    2654 
##               D.T.money                 D.T.els              D.T.stylus 
##                    2655                    2654                    2655 
##              D.T.corpor               D.T.intro              D.T.higher 
##                    2655                    2656                    2656 
##               D.T.beetl              D.T.defens            D.T.disclaim 
##                    2656                    2656                    2656 
##             D.T.essenti               D.T.final             D.T.repeat. 
##                    2656                    2656                    2656 
##             D.nwrds.log         D.nwrds.unq.log             D.sum.TfIdf 
##                    1520                    1521                    1521 
## D.ratio.sum.TfIdf.nwrds             D.nchrs.log             D.nuppr.log 
##                    1521                    1520                    1522 
##             D.ndgts.log           D.npnct01.log           D.npnct02.log 
##                    2427                    2579                    2657 
##           D.npnct03.log           D.npnct04.log           D.npnct05.log 
##                    2614                    2657                    2592 
##           D.npnct06.log           D.npnct07.log           D.npnct08.log 
##                    2554                    2656                    2581 
##           D.npnct09.log           D.npnct10.log           D.npnct11.log 
##                    2641                    2648                    2301 
##           D.npnct12.log           D.npnct13.log           D.npnct14.log 
##                    2538                    1932                    2582 
##           D.npnct15.log           D.npnct16.log           D.npnct17.log 
##                    2637                    2546                    2657 
##           D.npnct18.log           D.npnct19.log           D.npnct20.log 
##                    2656                    2657                    2657 
##           D.npnct21.log           D.npnct22.log           D.npnct23.log 
##                    2657                    2657                    2657 
##           D.npnct24.log           D.npnct25.log           D.npnct26.log 
##                    1520                    2657                    2657 
##           D.npnct27.log           D.npnct28.log           D.npnct29.log 
##                    2657                    2649                    2657 
##           D.npnct30.log         D.nstopwrds.log                D.P.http 
##                    2657                    1664                    2657 
##                D.P.mini                 D.P.air               D.P.black 
##                    2623                    2636                    2640 
##               D.P.white                D.P.gold           D.P.spacegray 
##                    2647                    2655                    2650 
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description   condition    cellular     carrier       color     storage 
##        1520           0           0           0           0           0 
## productline      .grpid  prdline.my    descr.my 
##           0          NA           0        1520

Step 4.1: manage missing data

if (glb_cluster) {
    require(proxy)
    #require(hash)
    require(dynamicTreeCut)
    require(entropy)
    require(tidyr)

#     glb_hash <- hash(key=unique(glb_allobs_df$myCategory), 
#                      values=1:length(unique(glb_allobs_df$myCategory)))
#     glb_hash_lst <- hash(key=unique(glb_allobs_df$myCategory), 
#                      values=1:length(unique(glb_allobs_df$myCategory)))
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
    print("Clustering features: ")
    print(cluster_vars <- grep(paste0("[", 
                                toupper(paste0(substr(glb_txt_vars, 1, 1), collapse="")),
                                      "]\\.[PT]\\."), 
                               names(glb_allobs_df), value=TRUE))
    print(sprintf("glb_allobs_df Entropy: %0.4f", 
        allobs_ent <- entropy(table(glb_allobs_df[, glb_cluster_entropy_var]),
                              method="ML")))
    
    category_df <- as.data.frame(table(glb_allobs_df[, glb_category_var], 
                                       glb_allobs_df[, glb_cluster_entropy_var]))
    names(category_df)[c(1, 2)] <- c(glb_category_var, glb_cluster_entropy_var)
    category_df <- do.call(tidyr::spread, 
                           list(category_df, glb_cluster_entropy_var, "Freq"))
    tmp.entropy <- sapply(1:nrow(category_df),
                    function(row) entropy(as.numeric(category_df[row, -1]), method="ML"))
    tmp.knt <- sapply(1:nrow(category_df),
                    function(row) sum(as.numeric(category_df[row, -1])))
    category_df$.entropy <- tmp.entropy; category_df$.knt <- tmp.knt
    print(sprintf("glb_allobs_df$%s Entropy: %0.4f (%0.4f pct)", glb_category_var,
        category_ent <- weighted.mean(category_df$.entropy, category_df$.knt),
        100 * category_ent / allobs_ent))
    print(category_df)

    glb_allobs_df$.clusterid <- 1    
    #print(max(table(glb_allobs_df$myCategory.fctr) / 20))
    
    for (grp in sort(unique(glb_allobs_df[, glb_category_var]))) {
        print(sprintf("Category: %s", grp))
        ctgry_allobs_df <- glb_allobs_df[glb_allobs_df[, glb_category_var] == grp, ]
        if (!inherits(ctgry_allobs_df[, glb_cluster_entropy_var], "factor"))
            ctgry_allobs_df[, glb_cluster_entropy_var] <- 
                as.factor(ctgry_allobs_df[, glb_cluster_entropy_var])
        
        dstns_dist <- dist(ctgry_allobs_df[, cluster_vars], method = "cosine")
        dstns_mtrx <- as.matrix(dstns_dist)
        print(sprintf("max distance(%0.4f) pair:", max(dstns_mtrx)))
        row_ix <- ceiling(which.max(dstns_mtrx) / ncol(dstns_mtrx))
        col_ix <- which.max(dstns_mtrx[row_ix, ])
        print(ctgry_allobs_df[c(row_ix, col_ix), 
            c(glb_id_var, glb_cluster_entropy_var, glb_category_var, glb_txt_vars, cluster_vars)])
    
        min_dstns_mtrx <- dstns_mtrx
        diag(min_dstns_mtrx) <- 1
        # Float representations issue -2.22e-16 vs. 0.0000
        print(sprintf("min distance(%0.4f) pair:", min(min_dstns_mtrx)))
        row_ix <- ceiling(which.min(min_dstns_mtrx) / ncol(min_dstns_mtrx))
        col_ix <- which.min(min_dstns_mtrx[row_ix, ])
        print(ctgry_allobs_df[c(row_ix, col_ix), 
            c(glb_id_var, glb_cluster_entropy_var, glb_category_var, glb_txt_vars,
              cluster_vars)])
    
        set.seed(glb_cluster.seed)
        clusters <- hclust(dstns_dist, method = "ward.D2")
        #plot(clusters, labels=NULL, hang=-1)
        myplclust(clusters, lab.col=unclass(ctgry_allobs_df[, glb_cluster_entropy_var]))
        
        #clusterGroups = cutree(clusters, k=7)
        clusterGroups <- cutreeDynamic(clusters, minClusterSize=10, method="tree",
                                       deepSplit=0)
        # Unassigned groups are labeled 0; the largest group has label 1
        table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")   
        #print(ctgry_allobs_df[which(clusterGroups == 1), c("UniqueID", "Popular", "Headline")])
        #print(ctgry_allobs_df[(clusterGroups == 1) & !is.na(ctgry_allobs_df$Popular) & (ctgry_allobs_df$Popular==1), c("UniqueID", "Popular", "Headline")])
        clusterGroups[clusterGroups == 0] <- 1
        table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")        
        #summary(factor(clusterGroups))
#         clusterGroups <- clusterGroups + 
#                 100 * # has to be > max(table(glb_allobs_df[, glb_category_var].fctr) / minClusterSize=20)
#                             which(levels(glb_allobs_df[, glb_category_var].fctr) == grp)
#         table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")        
    
        # add to glb_allobs_df - then split the data again
        glb_allobs_df[glb_allobs_df[, glb_category_var]==grp,]$.clusterid <- clusterGroups
        #print(unique(glb_allobs_df$.clusterid))
        #print(glb_feats_df[glb_feats_df$id == ".clusterid.fctr", ])
    }
    
    cluster_df <- as.data.frame(table(glb_allobs_df[, glb_category_var], 
                                      glb_allobs_df[, ".clusterid"], 
                                      glb_allobs_df[, glb_cluster_entropy_var]))
    cluster_df <- subset(cluster_df, Freq > 0)
    names(cluster_df)[c(1, 2, 3)] <- c(glb_category_var, ".clusterid",
                                       glb_cluster_entropy_var)
#     spread(unite(cluster_df, prdline.my.clusterid, prdline.my, .clusterid),
#            sold.fctr, Freq)
    cluster_df <- do.call(tidyr::unite,
                          list(cluster_df, paste0(glb_category_var, ".clusterid"),
                               grep(glb_category_var, names(cluster_df)),
                               grep(".clusterid", names(cluster_df))))
    cluster_df <- do.call(tidyr::spread, 
                          list(cluster_df, glb_cluster_entropy_var, "Freq"))
    cluster_df[is.na(cluster_df)] <- 0
    tmp.entropy <- sapply(1:nrow(cluster_df),
                    function(row) entropy(as.numeric(cluster_df[row, -1]), method="ML"))
    tmp.knt <- sapply(1:nrow(cluster_df),
                    function(row) sum(as.numeric(cluster_df[row, -1])))
    cluster_df$.entropy <- tmp.entropy; cluster_df$.knt <- tmp.knt
    print(sprintf("glb_allobs_df$%s$.clusterid Entropy: %0.4f (%0.4f pct)",
                  glb_category_var,
        cluster_ent <- weighted.mean(cluster_df$.entropy, cluster_df$.knt),
        100 * cluster_ent / category_ent))
    print(cluster_df)

    glb_allobs_df$.clusterid.fctr <- as.factor(glb_allobs_df$.clusterid)
    glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, 
                                      ".clusterid")
    glb_interaction_only_features[ifelse(grepl("\\.fctr", glb_category_var),
                                         glb_category_var, 
                                         paste0(glb_category_var, ".fctr"))] <-
        c(".clusterid.fctr")
    glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, 
                                      cluster_vars)
}
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## 
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## 
## The following object is masked from 'package:base':
## 
##     as.matrix
## 
## Loading required package: dynamicTreeCut
## Loading required package: entropy
## [1] "Clustering features: "
##   [1] "D.T.condit"     "D.T.use"        "D.T.scratch"    "D.T.new"       
##   [5] "D.T.good"       "D.T.screen"     "D.T.great"      "D.T.ipad"      
##   [9] "D.T.work"       "D.T.excel"      "D.T.like"       "D.T.box"       
##  [13] "D.T.function."  "D.T.item"       "D.T.fulli"      "D.T.cosmet"    
##  [17] "D.T.minor"      "D.T.mint"       "D.T.crack"      "D.T.wear"      
##  [21] "D.T.perfect"    "D.T.includ"     "D.T.lock"       "D.T.case"      
##  [25] "D.T.icloud"     "D.T.see"        "D.T.light"      "D.T.devic"     
##  [29] "D.T.pleas"      "D.T.back"       "D.T.origin"     "D.T.dent"      
##  [33] "D.T.hous"       "D.T.sign"       "D.T.open"       "D.T.clean"     
##  [37] "D.T.will"       "D.T.appl"       "D.T.charger"    "D.T.damag"     
##  [41] "D.T.X100"       "D.T.come"       "D.T.scuff"      "D.T.corner"    
##  [45] "D.T.small"      "D.T.broken"     "D.T.descript"   "D.T.unit"      
##  [49] "D.T.refurbish"  "D.T.show"       "D.T.shape"      "D.T.read"      
##  [53] "D.T.test"       "D.T.pictur"     "D.T.bare"       "D.T.brand"     
##  [57] "D.T.list"       "D.T.may"        "D.T.mark"       "D.T.blemish"   
##  [61] "D.T.packag"     "D.T.mini"       "D.T.affect"     "D.T.normal"    
##  [65] "D.T.tab"        "D.T.top"        "D.T.accessori"  "D.T.ding"      
##  [69] "D.T.near"       "D.T.digit"      "D.T.photo"      "D.T.tear"      
##  [73] "D.T.display"    "D.T.minim"      "D.T.wifi"       "D.T.order"     
##  [77] "D.T.protector"  "D.T.kept"       "D.T.right"      "D.T.previous"  
##  [81] "D.T.button"     "D.T.alway"      "D.T.contact"    "D.T.fair"      
##  [85] "D.T.air"        "D.T.esn"        "D.T.full"       "D.T.averag"    
##  [89] "D.T.free"       "D.T.sinc"       "D.T.imei"       "D.T.cabl"      
##  [93] "D.T.seal"       "D.T.profession" "D.T.overal"     "D.T.retail"    
##  [97] "D.T.refer"      "D.T.left"       "D.T.stock"      "D.T.two"       
## [101] "D.T.detail"     "D.T.bodi"       "D.T.seller"     "D.T.activ"     
## [105] "D.T.phone"      "D.T.problem"    "D.T.manufactur" "D.T.side"      
## [109] "D.T.certifi"    "D.T.ship"       "D.T.chip"       "D.T.edg"       
## [113] "D.T.inspect"    "D.T.heavili"    "D.T.keyboard"   "D.T.non"       
## [117] "D.T.geek"       "D.T.squad"      "D.T.handset"    "D.T.upper"     
## [121] "D.T.sticker"    "D.T.scroll"     "D.T.must"       "D.T.contain"   
## [125] "D.T.imag"       "D.T.qualiti"    "D.T.anoth"      "D.T.pic"       
## [129] "D.T.least"      "D.T.correct"    "D.T.featur"     "D.T.technician"
## [133] "D.T.super"      "D.T.expect"     "D.T.sync"       "D.T.speaker"   
## [137] "D.T.name"       "D.T.lightn"     "D.T.X2016"      "D.T.passcod"   
## [141] "D.T.money"      "D.T.els"        "D.T.stylus"     "D.T.corpor"    
## [145] "D.T.intro"      "D.T.higher"     "D.T.beetl"      "D.T.defens"    
## [149] "D.T.disclaim"   "D.T.essenti"    "D.T.final"      "D.T.repeat."   
## [153] "D.P.http"       "D.P.mini"       "D.P.air"        "D.P.black"     
## [157] "D.P.white"      "D.P.gold"       "D.P.spacegray" 
## [1] "glb_allobs_df Entropy: 0.6903"
## [1] "glb_allobs_df$prdl.my.descr.fctr Entropy: 0.6779 (98.1925 pct)"
##    prdl.my.descr.fctr   N  Y  .entropy .knt
## 1           Unknown#0  72 47 0.6709143  119
## 2           Unknown#1  46 33 0.6795459   79
## 3            iPad 1#0  53 69 0.6845225  122
## 4            iPad 1#1  47 56 0.6893248  103
## 5            iPad 2#0  57 80 0.6789878  137
## 6            iPad 2#1  84 67 0.6867963  151
## 7           iPad 3+#0  58 87 0.6730117  145
## 8           iPad 3+#1 108 58 0.6470729  166
## 9           iPadAir#0 125 95 0.6838206  220
## 10          iPadAir#1  78 55 0.6781190  133
## 11      iPadmini 2+#0  95 59 0.6655694  154
## 12      iPadmini 2+#1  30 21 0.6774944   51
## 13         iPadmini#0  94 79 0.6893836  173
## 14         iPadmini#1  52 54 0.6929692  106
## [1] "Category: Unknown#0"
## [1] "max distance(0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 24      10024         N          Unknown#0                   0       0
## 24.1    10024         N          Unknown#0                   0       0
##      D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 24             0       0        0          0         0        0        0
## 24.1           0       0        0          0         0        0        0
##      D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 24           0        0       0             0        0         0
## 24.1         0        0       0             0        0         0
##      D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 24            0         0        0         0        0           0
## 24.1          0         0        0         0        0           0
##      D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 24            0        0        0          0       0         0         0
## 24.1          0        0        0          0       0         0         0
##      D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 24           0        0          0        0        0        0        0
## 24.1         0        0          0        0        0        0        0
##      D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 24           0        0        0           0         0        0        0
## 24.1         0        0        0           0         0        0        0
##      D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 24           0          0         0          0            0        0
## 24.1         0          0         0          0            0        0
##      D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 24               0        0         0        0        0          0
## 24.1             0        0         0        0        0          0
##      D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 24          0         0        0       0        0           0          0
## 24.1        0         0        0       0        0           0          0
##      D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 24          0          0          0       0       0             0        0
## 24.1        0          0          0       0       0             0        0
##      D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 24          0         0         0        0           0         0        0
## 24.1        0         0         0        0           0         0        0
##      D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 24           0             0        0         0            0          0
## 24.1         0             0        0         0            0          0
##      D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 24           0           0        0       0       0        0          0
## 24.1         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 24          0        0        0        0        0              0
## 24.1        0        0        0        0        0              0
##      D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 24            0          0         0        0         0       0          0
## 24.1          0          0         0        0         0       0          0
##      D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 24          0          0         0         0           0              0
## 24.1        0          0         0         0           0              0
##      D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 24          0           0        0        0       0           0
## 24.1        0           0        0        0       0           0
##      D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 24             0            0       0        0         0           0
## 24.1           0            0       0        0         0           0
##      D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 24           0           0          0        0           0        0
## 24.1         0           0          0        0           0        0
##      D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 24             0         0       0         0           0          0
## 24.1           0         0       0         0           0          0
##      D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 24                0         0          0        0           0        0
## 24.1              0         0          0        0           0        0
##      D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 24            0         0           0         0       0          0
## 24.1          0         0           0         0       0          0
##      D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 24            0         0          0         0          0            0
## 24.1          0         0          0         0          0            0
##      D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 24             0         0           0        0        0       0         0
## 24.1           0         0           0        0        0       0         0
##      D.P.white D.P.gold D.P.spacegray
## 24           0        0             0
## 24.1         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 24    10024         N          Unknown#0                   0       0
## 66    10066         N          Unknown#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 24           0       0        0          0         0        0        0
## 66           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 24         0        0       0             0        0         0          0
## 66         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 24         0        0         0        0           0          0        0
## 66         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 24        0          0       0         0         0         0        0
## 66        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 24          0        0        0        0        0         0        0
## 66          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 24        0           0         0        0        0         0          0
## 66        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 24         0          0            0        0             0        0
## 66         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 24         0        0        0          0        0         0        0
## 66         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 24       0        0           0          0        0          0          0
## 66       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 24       0       0             0        0        0         0         0
## 66       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 24        0           0         0        0         0             0
## 66        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 24        0         0            0          0         0           0
## 66        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 24        0       0       0        0          0        0        0        0
## 66        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 24        0        0              0          0          0         0
## 66        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 24        0         0       0          0        0          0         0
## 66        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 24         0           0              0        0           0        0
## 66         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 24        0       0           0           0            0       0        0
## 66        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 24         0           0         0           0          0        0
## 66         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 24           0        0           0         0       0         0
## 66           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 24           0          0              0         0          0        0
## 66           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 24           0        0          0         0           0         0       0
## 66           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 24          0          0         0          0         0          0
## 66          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 24            0           0         0           0        0        0
## 66            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 24       0         0         0        0             0
## 66       0         0         0        0             0

## [1] "Category: Unknown#1"
## [1] "max distance(1.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr
## 5      10005         N          Unknown#1
## 130    10130         Y          Unknown#1
##                                                                                                descr.my
## 5   Please feel free to buy. All product have been thoroughly inspected, cleaned and tested to be 100% 
## 130                                                                   New - Open Box. Charger included.
##     D.T.condit D.T.use D.T.scratch   D.T.new D.T.good D.T.screen D.T.great
## 5            0       0           0 0.0000000        0          0         0
## 130          0       0           0 0.8180361        0          0         0
##     D.T.ipad D.T.work D.T.excel D.T.like   D.T.box D.T.function. D.T.item
## 5          0        0         0        0 0.0000000             0        0
## 130        0        0         0        0 0.9188446             0        0
##     D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 5           0          0         0        0         0        0           0
## 130         0          0         0        0         0        0           0
##     D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 5     0.000000        0        0          0       0         0         0
## 130   1.000109        0        0          0       0         0         0
##     D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 5   0.5309493        0          0        0        0        0  0.00000
## 130 0.0000000        0          0        0        0        0  1.18323
##     D.T.clean D.T.will D.T.appl D.T.charger D.T.damag  D.T.X100 D.T.come
## 5   0.5983265        0        0    0.000000         0 0.5375583        0
## 130 0.0000000        0        0    1.225531         0 0.0000000        0
##     D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 5           0          0         0          0            0        0
## 130         0          0         0          0            0        0
##     D.T.refurbish D.T.show D.T.shape D.T.read  D.T.test D.T.pictur
## 5               0        0         0        0 0.6166129          0
## 130             0        0         0        0 0.0000000          0
##     D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 5          0         0        0       0        0           0          0
## 130        0         0        0       0        0           0          0
##     D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 5          0          0          0       0       0             0        0
## 130        0          0          0       0       0             0        0
##     D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 5          0         0         0        0           0         0        0
## 130        0         0         0        0           0         0        0
##     D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 5           0             0        0         0            0          0
## 130         0             0        0         0            0          0
##     D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 5           0           0        0       0       0        0          0
## 130         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 5   0.7127655        0        0        0        0              0
## 130 0.0000000        0        0        0        0              0
##     D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 5            0          0         0        0         0       0          0
## 130          0          0         0        0         0       0          0
##     D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 5          0          0         0         0           0              0
## 130        0          0         0         0           0              0
##     D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 5          0           0        0        0       0   0.8205658           0
## 130        0           0        0        0       0   0.0000000           0
##     D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 5              0       0        0         0           0         0
## 130            0       0        0         0           0         0
##     D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 5             0          0        0           0        0           0
## 130           0          0        0           0        0           0
##     D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 5           0       0         0           0          0              0
## 130         0       0         0           0          0              0
##     D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 5           0          0        0           0        0          0
## 130         0          0        0           0        0          0
##     D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 5           0           0         0       0          0          0
## 130         0           0         0       0          0          0
##     D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 5           0          0         0          0            0           0
## 130         0          0         0          0            0           0
##     D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 5           0           0        0        0       0         0         0
## 130         0           0        0        0       0         0         0
##     D.P.gold D.P.spacegray
## 5          0             0
## 130        0             0
## [1] "min distance(-0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr
## 244     10244         N          Unknown#1
## 1293    11294         N          Unknown#1
##                                                                                                   descr.my
## 244  Sync/ Charge cable included.  Unit is in perfect working order with only minimal scuffs.  No earbuds 
## 1293 Sync/ Charge cable included.  Unit is in perfect working order with only minimal scuffs.  No earbuds 
##      D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 244           0       0           0       0        0          0         0
## 1293          0       0           0       0        0          0         0
##      D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 244         0 0.340566         0        0       0             0        0
## 1293        0 0.340566         0        0       0             0        0
##      D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 244          0          0         0        0         0        0
## 1293         0          0         0        0         0        0
##      D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 244    0.5085657  0.4545948        0        0          0       0         0
## 1293   0.5085657  0.4545948        0        0          0       0         0
##      D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 244          0         0        0          0        0        0        0
## 1293         0         0        0          0        0        0        0
##      D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 244         0         0        0        0           0         0        0
## 1293        0         0        0        0           0         0        0
##      D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 244         0 0.5439332          0         0          0            0
## 1293        0 0.5439332          0         0          0            0
##       D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 244  0.5503322             0        0         0        0        0
## 1293 0.5503322             0        0         0        0        0
##      D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 244           0        0         0        0       0        0           0
## 1293          0        0         0        0       0        0           0
##      D.T.packag D.T.mini D.T.affect D.T.normal D.T.tab D.T.top
## 244           0        0          0          0       0       0
## 1293          0        0          0          0       0       0
##      D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 244              0        0        0         0         0        0
## 1293             0        0        0         0         0        0
##      D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 244            0 0.5971116        0 0.6348423             0        0
## 1293           0 0.5971116        0 0.6348423             0        0
##      D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 244          0            0          0         0           0        0
## 1293         0            0          0         0           0        0
##      D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 244        0       0        0          0        0        0        0
## 1293       0       0        0          0        0        0        0
##       D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 244  0.6550598        0              0          0          0         0
## 1293 0.6550598        0              0          0          0         0
##      D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 244         0         0       0          0        0          0         0
## 1293        0         0       0          0        0          0         0
##      D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 244          0           0              0        0           0        0
## 1293         0           0              0        0           0        0
##      D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 244         0       0           0           0            0       0
## 1293        0       0           0           0            0       0
##      D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 244         0         0           0         0           0          0
## 1293        0         0           0         0           0          0
##      D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 244         0           0        0           0         0       0         0
## 1293        0           0        0           0         0       0         0
##      D.T.correct D.T.featur D.T.technician D.T.super D.T.expect  D.T.sync
## 244            0          0              0         0          0 0.8230595
## 1293           0          0              0         0          0 0.8230595
##      D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 244            0        0          0         0           0         0
## 1293           0        0          0         0           0         0
##      D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 244        0          0          0         0          0         0
## 1293       0          0          0         0          0         0
##      D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 244           0            0           0         0           0        0
## 1293          0            0           0         0           0        0
##      D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 244         0       0         0         0        0             0
## 1293        0       0         0         0        0             0

## [1] "Category: iPad 1#0"
## [1] "max distance(0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 9      10009         Y           iPad 1#0                   0       0
## 9.1    10009         Y           iPad 1#0                   0       0
##     D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 9             0       0        0          0         0        0        0
## 9.1           0       0        0          0         0        0        0
##     D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 9           0        0       0             0        0         0          0
## 9.1         0        0       0             0        0         0          0
##     D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 9           0        0         0        0           0          0        0
## 9.1         0        0         0        0           0          0        0
##     D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 9          0          0       0         0         0         0        0
## 9.1        0          0       0         0         0         0        0
##     D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 9            0        0        0        0        0         0        0
## 9.1          0        0        0        0        0         0        0
##     D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 9          0           0         0        0        0         0          0
## 9.1        0           0         0        0        0         0          0
##     D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 9           0          0            0        0             0        0
## 9.1         0          0            0        0             0        0
##     D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 9           0        0        0          0        0         0        0
## 9.1         0        0        0          0        0         0        0
##     D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 9         0        0           0          0        0          0          0
## 9.1       0        0           0          0        0          0          0
##     D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 9         0       0             0        0        0         0         0
## 9.1       0       0             0        0        0         0         0
##     D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 9          0           0         0        0         0             0
## 9.1        0           0         0        0         0             0
##     D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 9          0         0            0          0         0           0
## 9.1        0         0            0          0         0           0
##     D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 9          0       0       0        0          0        0        0
## 9.1        0       0       0        0          0        0        0
##     D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 9          0        0        0              0          0          0
## 9.1        0        0        0              0          0          0
##     D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 9           0        0         0       0          0        0          0
## 9.1         0        0         0       0          0        0          0
##     D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 9           0         0           0              0        0           0
## 9.1         0         0           0              0        0           0
##     D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 9          0        0       0           0           0            0       0
## 9.1        0        0       0           0           0            0       0
##     D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 9          0         0           0         0           0          0
## 9.1        0         0           0         0           0          0
##     D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 9          0           0        0           0         0       0         0
## 9.1        0           0        0           0         0       0         0
##     D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 9             0          0              0         0          0        0
## 9.1           0          0              0         0          0        0
##     D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 9             0        0          0         0           0         0
## 9.1           0        0          0         0           0         0
##     D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 9         0          0          0         0          0         0
## 9.1       0          0          0         0          0         0
##     D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 9            0            0           0         0           0        0
## 9.1          0            0           0         0           0        0
##     D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 9          0       0         0         0        0             0
## 9.1        0       0         0         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 9     10009         Y           iPad 1#0                   0       0
## 12    10012         N           iPad 1#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 9            0       0        0          0         0        0        0
## 12           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 9          0        0       0             0        0         0          0
## 12         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 9          0        0         0        0           0          0        0
## 12         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 9         0          0       0         0         0         0        0
## 12        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 9           0        0        0        0        0         0        0
## 12          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 9         0           0         0        0        0         0          0
## 12        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 9          0          0            0        0             0        0
## 12         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 9          0        0        0          0        0         0        0
## 12         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 9        0        0           0          0        0          0          0
## 12       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 9        0       0             0        0        0         0         0
## 12       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 9         0           0         0        0         0             0
## 12        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 9         0         0            0          0         0           0
## 12        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 9         0       0       0        0          0        0        0        0
## 12        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 9         0        0              0          0          0         0
## 12        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 9         0         0       0          0        0          0         0
## 12        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 9          0           0              0        0           0        0
## 12         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 9         0       0           0           0            0       0        0
## 12        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 9          0           0         0           0          0        0
## 12         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 9            0        0           0         0       0         0
## 12           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 9            0          0              0         0          0        0
## 12           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 9            0        0          0         0           0         0       0
## 12           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 9           0          0         0          0         0          0
## 12          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 9             0           0         0           0        0        0
## 12            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 9        0         0         0        0             0
## 12       0         0         0        0             0

## [1] "Category: iPad 1#1"
## [1] "max distance(1.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr
## 13    10013         Y           iPad 1#1
## 68    10068         Y           iPad 1#1
##                                                                                            descr.my
## 13 GOOD CONDITION. CLEAN ICLOUD. NO LOCKS. CLEAN IMEI. This tablet has been fully tested and works 
## 68    14 Days Warranty. Product has some sign of wear and scratches from previous use. (Please see 
##    D.T.condit   D.T.use D.T.scratch D.T.new  D.T.good D.T.screen D.T.great
## 13  0.2193351 0.0000000   0.0000000       0 0.3412301          0         0
## 68  0.0000000 0.3190707   0.3215711       0 0.0000000          0         0
##    D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 13        0 0.340566         0        0       0             0        0
## 68        0 0.000000         0        0       0             0        0
##    D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack  D.T.wear D.T.perfect
## 13 0.4469228          0         0        0         0 0.0000000           0
## 68 0.0000000          0         0        0         0 0.4717371           0
##    D.T.includ  D.T.lock D.T.case D.T.icloud   D.T.see D.T.light D.T.devic
## 13          0 0.5408471        0  0.5062025 0.0000000         0         0
## 68          0 0.0000000        0  0.0000000 0.5647662         0         0
##    D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous  D.T.sign D.T.open
## 13 0.0000000        0          0        0        0 0.0000000        0
## 68 0.5309493        0          0        0        0 0.5108796        0
##    D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 13  1.087866        0        0           0         0        0        0
## 68  0.000000        0        0           0         0        0        0
##    D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 13         0          0         0          0            0        0
## 68         0          0         0          0            0        0
##    D.T.refurbish D.T.show D.T.shape D.T.read  D.T.test D.T.pictur D.T.bare
## 13             0        0         0        0 0.5605572          0        0
## 68             0        0         0        0 0.0000000          0        0
##    D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 13         0        0       0        0           0          0        0
## 68         0        0       0        0           0          0        0
##    D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 13          0          0       0       0             0        0        0
## 68          0          0       0       0             0        0        0
##    D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 13         0         0        0           0         0        0         0
## 68         0         0        0           0         0        0         0
##    D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 13             0        0         0    0.0000000          0         0
## 68             0        0         0    0.6852021          0         0
##    D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 13           0        0       0       0        0          0        0
## 68           0        0       0       0        0          0        0
##    D.T.sinc  D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 13        0 0.6625563        0        0              0          0
## 68        0 0.0000000        0        0              0          0
##    D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 13          0         0        0         0       0          0        0
## 68          0         0        0         0       0          0        0
##    D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 13          0         0         0           0              0        0
## 68          0         0         0           0              0        0
##    D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 13           0        0        0       0           0           0
## 68           0        0        0       0           0           0
##    D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 13            0       0        0         0           0         0
## 68            0       0        0         0           0         0
##    D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 13           0          0        0           0        0           0
## 68           0          0        0           0        0           0
##    D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 13         0       0         0           0          0              0
## 68         0       0         0           0          0              0
##    D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 13         0          0        0           0        0          0         0
## 68         0          0        0           0        0          0         0
##    D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 13           0         0       0          0          0         0
## 68           0         0       0          0          0         0
##    D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 13          0         0          0            0           0         0
## 68          0         0          0            0           0         0
##    D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 13           0        0        0       0         0         0        0
## 68           0        0        0       0         0         0        0
##    D.P.spacegray
## 13             0
## 68             0
## [1] "min distance(-0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr
## 1046    11046         N           iPad 1#1
## 2371    12373      <NA>           iPad 1#1
##                                                                                             descr.my
## 1046 This TAB is in average condition with some scratches on the housing or screen (does not affect 
## 2371 This TAB is in average condition with some scratches on the housing or screen (does not affect 
##      D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 1046  0.3446695       0   0.4593873       0        0  0.5201247         0
## 2371  0.3446695       0   0.4593873       0        0  0.5201247         0
##      D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 1046        0        0         0        0       0             0        0
## 2371        0        0         0        0       0             0        0
##      D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 1046         0          0         0        0         0        0
## 2371         0          0         0        0         0        0
##      D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 1046           0          0        0        0          0       0         0
## 2371           0          0        0        0          0       0         0
##      D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent  D.T.hous D.T.sign
## 1046         0         0        0          0        0 0.7436654        0
## 2371         0         0        0          0        0 0.7436654        0
##      D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 1046        0         0        0        0           0         0        0
## 2371        0         0        0        0           0         0        0
##      D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 1046        0         0          0         0          0            0
## 2371        0         0          0         0          0            0
##      D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 1046        0             0        0         0        0        0
## 2371        0             0        0         0        0        0
##      D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 1046          0        0         0        0       0        0           0
## 2371          0        0         0        0       0        0           0
##      D.T.packag D.T.mini D.T.affect D.T.normal   D.T.tab D.T.top
## 1046          0        0  0.9383182          0 0.9458136       0
## 2371          0        0  0.9383182          0 0.9458136       0
##      D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 1046             0        0        0         0         0        0
## 2371             0        0        0         0         0        0
##      D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 1046           0         0        0         0             0        0
## 2371           0         0        0         0             0        0
##      D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 1046         0            0          0         0           0        0
## 2371         0            0          0         0           0        0
##      D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 1046       0       0        0   1.066956        0        0        0
## 2371       0       0        0   1.066956        0        0        0
##      D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 1046        0        0              0          0          0         0
## 2371        0        0              0          0          0         0
##      D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 1046        0         0       0          0        0          0         0
## 2371        0         0       0          0        0          0         0
##      D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 1046         0           0              0        0           0        0
## 2371         0           0              0        0           0        0
##      D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 1046        0       0           0           0            0       0
## 2371        0       0           0           0            0       0
##      D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 1046        0         0           0         0           0          0
## 2371        0         0           0         0           0          0
##      D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 1046        0           0        0           0         0       0         0
## 2371        0           0        0           0         0       0         0
##      D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 1046           0          0              0         0          0        0
## 2371           0          0              0         0          0        0
##      D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 1046           0        0          0         0           0         0
## 2371           0        0          0         0           0         0
##      D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 1046       0          0          0         0          0         0
## 2371       0          0          0         0          0         0
##      D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 1046          0            0           0         0           0        0
## 2371          0            0           0         0           0        0
##      D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 1046        0       0         0         0        0             0
## 2371        0       0         0         0        0             0

## [1] "Category: iPad 2#0"
## [1] "max distance(0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 39      10039         N           iPad 2#0                   0       0
## 39.1    10039         N           iPad 2#0                   0       0
##      D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 39             0       0        0          0         0        0        0
## 39.1           0       0        0          0         0        0        0
##      D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 39           0        0       0             0        0         0
## 39.1         0        0       0             0        0         0
##      D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 39            0         0        0         0        0           0
## 39.1          0         0        0         0        0           0
##      D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 39            0        0        0          0       0         0         0
## 39.1          0        0        0          0       0         0         0
##      D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 39           0        0          0        0        0        0        0
## 39.1         0        0          0        0        0        0        0
##      D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 39           0        0        0           0         0        0        0
## 39.1         0        0        0           0         0        0        0
##      D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 39           0          0         0          0            0        0
## 39.1         0          0         0          0            0        0
##      D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 39               0        0         0        0        0          0
## 39.1             0        0         0        0        0          0
##      D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 39          0         0        0       0        0           0          0
## 39.1        0         0        0       0        0           0          0
##      D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 39          0          0          0       0       0             0        0
## 39.1        0          0          0       0       0             0        0
##      D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 39          0         0         0        0           0         0        0
## 39.1        0         0         0        0           0         0        0
##      D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 39           0             0        0         0            0          0
## 39.1         0             0        0         0            0          0
##      D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 39           0           0        0       0       0        0          0
## 39.1         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 39          0        0        0        0        0              0
## 39.1        0        0        0        0        0              0
##      D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 39            0          0         0        0         0       0          0
## 39.1          0          0         0        0         0       0          0
##      D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 39          0          0         0         0           0              0
## 39.1        0          0         0         0           0              0
##      D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 39          0           0        0        0       0           0
## 39.1        0           0        0        0       0           0
##      D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 39             0            0       0        0         0           0
## 39.1           0            0       0        0         0           0
##      D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 39           0           0          0        0           0        0
## 39.1         0           0          0        0           0        0
##      D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 39             0         0       0         0           0          0
## 39.1           0         0       0         0           0          0
##      D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 39                0         0          0        0           0        0
## 39.1              0         0          0        0           0        0
##      D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 39            0         0           0         0       0          0
## 39.1          0         0           0         0       0          0
##      D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 39            0         0          0         0          0            0
## 39.1          0         0          0         0          0            0
##      D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 39             0         0           0        0        0       0         0
## 39.1           0         0           0        0        0       0         0
##      D.P.white D.P.gold D.P.spacegray
## 39           0        0             0
## 39.1         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 39    10039         N           iPad 2#0                   0       0
## 73    10073         Y           iPad 2#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 39           0       0        0          0         0        0        0
## 73           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 39         0        0       0             0        0         0          0
## 73         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 39         0        0         0        0           0          0        0
## 73         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 39        0          0       0         0         0         0        0
## 73        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 39          0        0        0        0        0         0        0
## 73          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 39        0           0         0        0        0         0          0
## 73        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 39         0          0            0        0             0        0
## 73         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 39         0        0        0          0        0         0        0
## 73         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 39       0        0           0          0        0          0          0
## 73       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 39       0       0             0        0        0         0         0
## 73       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 39        0           0         0        0         0             0
## 73        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 39        0         0            0          0         0           0
## 73        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 39        0       0       0        0          0        0        0        0
## 73        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 39        0        0              0          0          0         0
## 73        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 39        0         0       0          0        0          0         0
## 73        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 39         0           0              0        0           0        0
## 73         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 39        0       0           0           0            0       0        0
## 73        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 39         0           0         0           0          0        0
## 73         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 39           0        0           0         0       0         0
## 73           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 39           0          0              0         0          0        0
## 73           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 39           0        0          0         0           0         0       0
## 73           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 39          0          0         0          0         0          0
## 73          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 39            0           0         0           0        0        0
## 73            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 39       0         0         0        0             0
## 73       0         0         0        0             0

## [1] "Category: iPad 2#1"
## [1] "max distance(1.0000) pair:"
##   UniqueID sold.fctr prdl.my.descr.fctr
## 1    10001         N           iPad 2#1
## 2    10002         Y           iPad 2#1
##                                                                                               descr.my
## 1                                                        iPad is in 8.5+ out of 10 cosmetic condition!
## 2 Previously used, please read description. May show signs of use such as scratches to the screen and 
##   D.T.condit   D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 1  0.8042288 0.0000000   0.0000000       0        0  0.0000000         0
## 2  0.0000000 0.5801286   0.2923374       0        0  0.3309884         0
##   D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 1 1.172534        0         0        0       0             0        0
## 2 0.000000        0         0        0       0             0        0
##   D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 1         0   1.501739         0        0         0        0           0
## 2         0   0.000000         0        0         0        0           0
##   D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 1          0        0        0          0       0         0         0
## 2          0        0        0          0       0         0         0
##   D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 1 0.0000000        0          0        0        0 0.000000        0
## 2 0.4826812        0          0        0        0 0.464436        0
##   D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 1         0        0        0           0         0        0        0
## 2         0        0        0           0         0        0        0
##   D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 1         0          0         0          0    0.0000000        0
## 2         0          0         0          0    0.5755626        0
##   D.T.refurbish  D.T.show D.T.shape  D.T.read D.T.test D.T.pictur D.T.bare
## 1             0 0.0000000         0 0.0000000        0          0        0
## 2             0 0.5184688         0 0.5837624        0          0        0
##   D.T.brand D.T.list   D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 1         0        0 0.0000000        0           0          0        0
## 2         0        0 0.5570595        0           0          0        0
##   D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 1          0          0       0       0             0        0        0
## 2          0          0       0       0             0        0        0
##   D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 1         0         0        0           0         0        0         0
## 2         0         0        0           0         0        0         0
##   D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 1             0        0         0     0.000000          0         0
## 2             0        0         0     0.622911          0         0
##   D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 1           0        0       0       0        0          0        0
## 2           0        0       0       0        0          0        0
##   D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 1        0        0        0        0              0          0          0
## 2        0        0        0        0              0          0          0
##   D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 1         0        0         0       0          0        0          0
## 2         0        0         0       0          0        0          0
##   D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 1         0         0           0              0        0           0
## 2         0         0           0              0        0           0
##   D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 1        0        0       0           0           0            0       0
## 2        0        0       0           0           0            0       0
##   D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 1        0         0           0         0           0          0        0
## 2        0         0           0         0           0          0        0
##   D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct
## 1           0        0           0         0       0         0           0
## 2           0        0           0         0       0         0           0
##   D.T.featur D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker
## 1          0              0         0          0        0           0
## 2          0              0         0          0        0           0
##   D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 1        0          0         0           0         0       0          0
## 2        0          0         0           0         0       0          0
##   D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 1          0         0          0         0          0            0
## 2          0         0          0         0          0            0
##   D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 1           0         0           0        0        0       0         0
## 2           0         0           0        0        0       0         0
##   D.P.white D.P.gold D.P.spacegray
## 1         0        0             0
## 2         0        0             0
## [1] "min distance(-0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr
## 158     10158         N           iPad 2#1
## 1196    11196         Y           iPad 2#1
##                                                                                                    descr.my
## 158      This iPad 2 is used and is in good working order. It has scuffs/ scratches from general use. Note 
## 1196 This iPad 2 is used and is in good working order. It has scuffs/ scratches from general use and there 
##      D.T.condit   D.T.use D.T.scratch D.T.new  D.T.good D.T.screen
## 158           0 0.6381414   0.3215711       0 0.3753531          0
## 1196          0 0.7090460   0.3573012       0 0.4170590          0
##      D.T.great  D.T.ipad  D.T.work D.T.excel D.T.like D.T.box
## 158          0 0.3517602 0.3746226         0        0       0
## 1196         0 0.3908446 0.4162473         0        0       0
##      D.T.function. D.T.item D.T.fulli D.T.cosmet D.T.minor D.T.mint
## 158              0        0         0          0         0        0
## 1196             0        0         0          0         0        0
##      D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock D.T.case
## 158          0        0           0          0        0        0
## 1196         0        0           0          0        0        0
##      D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back D.T.origin
## 158           0       0         0         0         0        0          0
## 1196          0       0         0         0         0        0          0
##      D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will D.T.appl
## 158         0        0        0        0         0        0        0
## 1196        0        0        0        0         0        0        0
##      D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 158            0         0        0        0 0.5983265          0
## 1196           0         0        0        0 0.6648072          0
##      D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 158          0          0            0        0             0        0
## 1196         0          0            0        0             0        0
##      D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 158          0        0        0          0        0         0        0
## 1196         0        0        0          0        0         0        0
##      D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect
## 158        0        0           0          0        0          0
## 1196       0        0           0          0        0          0
##      D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit
## 158           0       0       0             0        0        0         0
## 1196          0       0       0             0        0        0         0
##      D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 158          0        0           0         0        0 0.6983265
## 1196         0        0           0         0        0 0.7759183
##      D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 158              0        0         0            0          0         0
## 1196             0        0         0            0          0         0
##      D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 158            0        0       0       0        0          0        0
## 1196           0        0       0       0        0          0        0
##      D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 158         0        0        0        0              0          0
## 1196        0        0        0        0              0          0
##      D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 158           0         0        0         0       0          0        0
## 1196          0         0        0         0       0          0        0
##      D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 158           0         0         0           0              0        0
## 1196          0         0         0           0              0        0
##      D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 158            0        0        0       0           0           0
## 1196           0        0        0       0           0           0
##      D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 158             0       0        0         0           0         0
## 1196            0       0        0         0           0         0
##      D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 158            0          0        0           0        0           0
## 1196           0          0        0           0        0           0
##      D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 158          0       0         0           0          0              0
## 1196         0       0         0           0          0              0
##      D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 158          0          0        0           0        0          0
## 1196         0          0        0           0        0          0
##      D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 158          0           0         0       0          0          0
## 1196         0           0         0       0          0          0
##      D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 158          0          0         0          0            0           0
## 1196         0          0         0          0            0           0
##      D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 158          0           0        0        0       0         0         0
## 1196         0           0        0        0       0         0         0
##      D.P.gold D.P.spacegray
## 158         0             0
## 1196        0             0

## [1] "Category: iPad 3+#0"
## [1] "max distance(0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 3      10003         Y          iPad 3+#0                   0       0
## 3.1    10003         Y          iPad 3+#0                   0       0
##     D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 3             0       0        0          0         0        0        0
## 3.1           0       0        0          0         0        0        0
##     D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 3           0        0       0             0        0         0          0
## 3.1         0        0       0             0        0         0          0
##     D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 3           0        0         0        0           0          0        0
## 3.1         0        0         0        0           0          0        0
##     D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 3          0          0       0         0         0         0        0
## 3.1        0          0       0         0         0         0        0
##     D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 3            0        0        0        0        0         0        0
## 3.1          0        0        0        0        0         0        0
##     D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 3          0           0         0        0        0         0          0
## 3.1        0           0         0        0        0         0          0
##     D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 3           0          0            0        0             0        0
## 3.1         0          0            0        0             0        0
##     D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 3           0        0        0          0        0         0        0
## 3.1         0        0        0          0        0         0        0
##     D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 3         0        0           0          0        0          0          0
## 3.1       0        0           0          0        0          0          0
##     D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 3         0       0             0        0        0         0         0
## 3.1       0       0             0        0        0         0         0
##     D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 3          0           0         0        0         0             0
## 3.1        0           0         0        0         0             0
##     D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 3          0         0            0          0         0           0
## 3.1        0         0            0          0         0           0
##     D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 3          0       0       0        0          0        0        0
## 3.1        0       0       0        0          0        0        0
##     D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 3          0        0        0              0          0          0
## 3.1        0        0        0              0          0          0
##     D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 3           0        0         0       0          0        0          0
## 3.1         0        0         0       0          0        0          0
##     D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 3           0         0           0              0        0           0
## 3.1         0         0           0              0        0           0
##     D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 3          0        0       0           0           0            0       0
## 3.1        0        0       0           0           0            0       0
##     D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 3          0         0           0         0           0          0
## 3.1        0         0           0         0           0          0
##     D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 3          0           0        0           0         0       0         0
## 3.1        0           0        0           0         0       0         0
##     D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 3             0          0              0         0          0        0
## 3.1           0          0              0         0          0        0
##     D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 3             0        0          0         0           0         0
## 3.1           0        0          0         0           0         0
##     D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 3         0          0          0         0          0         0
## 3.1       0          0          0         0          0         0
##     D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 3            0            0           0         0           0        0
## 3.1          0            0           0         0           0        0
##     D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 3          0       0         0         0        0             0
## 3.1        0       0         0         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 3     10003         Y          iPad 3+#0                   0       0
## 10    10010         Y          iPad 3+#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 3            0       0        0          0         0        0        0
## 10           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 3          0        0       0             0        0         0          0
## 10         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 3          0        0         0        0           0          0        0
## 10         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 3         0          0       0         0         0         0        0
## 10        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 3           0        0        0        0        0         0        0
## 10          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 3         0           0         0        0        0         0          0
## 10        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 3          0          0            0        0             0        0
## 10         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 3          0        0        0          0        0         0        0
## 10         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 3        0        0           0          0        0          0          0
## 10       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 3        0       0             0        0        0         0         0
## 10       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 3         0           0         0        0         0             0
## 10        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 3         0         0            0          0         0           0
## 10        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 3         0       0       0        0          0        0        0        0
## 10        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 3         0        0              0          0          0         0
## 10        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 3         0         0       0          0        0          0         0
## 10        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 3          0           0              0        0           0        0
## 10         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 3         0       0           0           0            0       0        0
## 10        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 3          0           0         0           0          0        0
## 10         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 3            0        0           0         0       0         0
## 10           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 3            0          0              0         0          0        0
## 10           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 3            0        0          0         0           0         0       0
## 10           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 3           0          0         0          0         0          0
## 10          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 3             0           0         0           0        0        0
## 10            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 3        0         0         0        0             0
## 10       0         0         0        0             0

## [1] "Category: iPad 3+#1"
## [1] "max distance(1.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr
## 11    10011         Y          iPad 3+#1
## 37    10037         Y          iPad 3+#1
##                                                                                        descr.my
## 11 good condition, minor wear and tear on body some light scratches on screen. functions great.
## 37                                                                         Rarely ever used it.
##    D.T.condit  D.T.use D.T.scratch D.T.new  D.T.good D.T.screen D.T.great
## 11  0.2193351 0.000000   0.2923374       0 0.3412301  0.3309884 0.4008907
## 37  0.0000000 1.063569   0.0000000       0 0.0000000  0.0000000 0.0000000
##    D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 11        0        0         0        0       0      0.410691        0
## 37        0        0         0        0       0      0.000000        0
##    D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack  D.T.wear D.T.perfect
## 11         0          0 0.4095653        0         0 0.4288519           0
## 37         0          0 0.0000000        0         0 0.0000000           0
##    D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 11          0        0        0          0       0 0.4577939         0
## 37          0        0        0          0       0 0.0000000         0
##    D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 11         0        0          0        0        0        0        0
## 37         0        0          0        0        0        0        0
##    D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 11         0        0        0           0         0        0        0
## 37         0        0        0           0         0        0        0
##    D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 11         0          0         0          0            0        0
## 37         0          0         0          0            0        0
##    D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 11             0        0         0        0        0          0        0
## 37             0        0         0        0        0          0        0
##    D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 11         0        0       0        0           0          0        0
## 37         0        0       0        0           0          0        0
##    D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 11          0          0       0       0             0        0        0
## 37          0          0       0       0             0        0        0
##    D.T.digit D.T.photo  D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 11         0         0 0.5837624           0         0        0         0
## 37         0         0 0.0000000           0         0        0         0
##    D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 11             0        0         0            0          0         0
## 37             0        0         0            0          0         0
##    D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 11           0        0       0       0        0          0        0
## 37           0        0       0       0        0          0        0
##    D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 11        0        0        0        0              0          0
## 37        0        0        0        0              0          0
##    D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail  D.T.bodi
## 11          0         0        0         0       0          0 0.7459689
## 37          0         0        0         0       0          0 0.0000000
##    D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 11          0         0         0           0              0        0
## 37          0         0         0           0              0        0
##    D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 11           0        0        0       0           0           0
## 37           0        0        0       0           0           0
##    D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 11            0       0        0         0           0         0
## 37            0       0        0         0           0         0
##    D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 11           0          0        0           0        0           0
## 37           0          0        0           0        0           0
##    D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 11         0       0         0           0          0              0
## 37         0       0         0           0          0              0
##    D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 11         0          0        0           0        0          0         0
## 37         0          0        0           0        0          0         0
##    D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 11           0         0       0          0          0         0
## 37           0         0       0          0          0         0
##    D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 11          0         0          0            0           0         0
## 37          0         0          0            0           0         0
##    D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 11           0        0        0       0         0         0        0
## 37           0        0        0       0         0         0        0
##    D.P.spacegray
## 11             0
## 37             0
## [1] "min distance(-0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr
## 17     10017         Y          iPad 3+#1
## 146    10146         N          iPad 3+#1
##                                                                                                  descr.my
## 17   Great working iPad.  Very minor surface scratches on back as pictured.  Other very light scratching 
## 146 Great working iPad.  Minor surface scratches on back as pictured.  Other very light scratching which 
##     D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 17           0       0   0.6431422       0        0          0 0.4409798
## 146          0       0   0.6431422       0        0          0 0.4409798
##      D.T.ipad  D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 17  0.3517602 0.3746226         0        0       0             0        0
## 146 0.3517602 0.3746226         0        0       0             0        0
##     D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 17          0          0 0.4505218        0         0        0           0
## 146         0          0 0.4505218        0         0        0           0
##     D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 17           0        0        0          0       0 0.5035733         0
## 146          0        0        0          0       0 0.5035733         0
##     D.T.pleas  D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 17          0 0.5108796          0        0        0        0        0
## 146         0 0.5108796          0        0        0        0        0
##     D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 17          0        0        0           0         0        0        0
## 146         0        0        0           0         0        0        0
##     D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 17          0          0         0          0            0        0
## 146         0          0         0          0            0        0
##     D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 17              0        0         0        0        0  0.6331188        0
## 146             0        0         0        0        0  0.6331188        0
##     D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 17          0        0       0        0           0          0        0
## 146         0        0       0        0           0          0        0
##     D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 17           0          0       0       0             0        0        0
## 146          0          0       0       0             0        0        0
##     D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 17          0         0        0           0         0        0         0
## 146         0         0        0           0         0        0         0
##     D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 17              0        0         0            0          0         0
## 146             0        0         0            0          0         0
##     D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 17            0        0       0       0        0          0        0
## 146           0        0       0       0        0          0        0
##     D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 17         0        0        0        0              0          0
## 146        0        0        0        0              0          0
##     D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 17           0         0        0         0       0          0        0
## 146          0         0        0         0       0          0        0
##     D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 17           0         0         0           0              0        0
## 146          0         0         0           0              0        0
##     D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 17            0        0        0       0           0           0
## 146           0        0        0       0           0           0
##     D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 17             0       0        0         0           0         0
## 146            0       0        0         0           0         0
##     D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 17            0          0        0           0        0           0
## 146           0          0        0           0        0           0
##     D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 17          0       0         0           0          0              0
## 146         0       0         0           0          0              0
##     D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 17          0          0        0           0        0          0
## 146         0          0        0           0        0          0
##     D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 17          0           0         0       0          0          0
## 146         0           0         0       0          0          0
##     D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 17          0          0         0          0            0           0
## 146         0          0         0          0            0           0
##     D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 17          0           0        0        0       0         0         0
## 146         0           0        0        0       0         0         0
##     D.P.gold D.P.spacegray
## 17         0             0
## 146        0             0

## [1] "Category: iPadAir#0"
## [1] "max distance(0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 16      10016         N          iPadAir#0                   0       0
## 16.1    10016         N          iPadAir#0                   0       0
##      D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 16             0       0        0          0         0        0        0
## 16.1           0       0        0          0         0        0        0
##      D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 16           0        0       0             0        0         0
## 16.1         0        0       0             0        0         0
##      D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 16            0         0        0         0        0           0
## 16.1          0         0        0         0        0           0
##      D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 16            0        0        0          0       0         0         0
## 16.1          0        0        0          0       0         0         0
##      D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 16           0        0          0        0        0        0        0
## 16.1         0        0          0        0        0        0        0
##      D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 16           0        0        0           0         0        0        0
## 16.1         0        0        0           0         0        0        0
##      D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 16           0          0         0          0            0        0
## 16.1         0          0         0          0            0        0
##      D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 16               0        0         0        0        0          0
## 16.1             0        0         0        0        0          0
##      D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 16          0         0        0       0        0           0          0
## 16.1        0         0        0       0        0           0          0
##      D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 16          0          0          0       0       0             0        0
## 16.1        0          0          0       0       0             0        0
##      D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 16          0         0         0        0           0         0        0
## 16.1        0         0         0        0           0         0        0
##      D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 16           0             0        0         0            0          0
## 16.1         0             0        0         0            0          0
##      D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 16           0           0        0       0       0        0          0
## 16.1         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 16          0        0        0        0        0              0
## 16.1        0        0        0        0        0              0
##      D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 16            0          0         0        0         0       0          0
## 16.1          0          0         0        0         0       0          0
##      D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 16          0          0         0         0           0              0
## 16.1        0          0         0         0           0              0
##      D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 16          0           0        0        0       0           0
## 16.1        0           0        0        0       0           0
##      D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 16             0            0       0        0         0           0
## 16.1           0            0       0        0         0           0
##      D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 16           0           0          0        0           0        0
## 16.1         0           0          0        0           0        0
##      D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 16             0         0       0         0           0          0
## 16.1           0         0       0         0           0          0
##      D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 16                0         0          0        0           0        0
## 16.1              0         0          0        0           0        0
##      D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 16            0         0           0         0       0          0
## 16.1          0         0           0         0       0          0
##      D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 16            0         0          0         0          0            0
## 16.1          0         0          0         0          0            0
##      D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 16             0         0           0        0        0       0         0
## 16.1           0         0           0        0        0       0         0
##      D.P.white D.P.gold D.P.spacegray
## 16           0        0             0
## 16.1         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 16    10016         N          iPadAir#0                   0       0
## 19    10019         Y          iPadAir#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 16           0       0        0          0         0        0        0
## 19           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 16         0        0       0             0        0         0          0
## 19         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 16         0        0         0        0           0          0        0
## 19         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 16        0          0       0         0         0         0        0
## 19        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 16          0        0        0        0        0         0        0
## 19          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 16        0           0         0        0        0         0          0
## 19        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 16         0          0            0        0             0        0
## 19         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 16         0        0        0          0        0         0        0
## 19         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 16       0        0           0          0        0          0          0
## 19       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 16       0       0             0        0        0         0         0
## 19       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 16        0           0         0        0         0             0
## 19        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 16        0         0            0          0         0           0
## 19        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 16        0       0       0        0          0        0        0        0
## 19        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 16        0        0              0          0          0         0
## 19        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 16        0         0       0          0        0          0         0
## 19        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 16         0           0              0        0           0        0
## 19         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 16        0       0           0           0            0       0        0
## 19        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 16         0           0         0           0          0        0
## 19         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 16           0        0           0         0       0         0
## 19           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 16           0          0              0         0          0        0
## 19           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 16           0        0          0         0           0         0       0
## 19           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 16          0          0         0          0         0          0
## 19          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 16            0           0         0           0        0        0
## 19            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 16       0         0         0        0             0
## 19       0         0         0        0             0

## [1] "Category: iPadAir#1"
## [1] "max distance(1.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr
## 30    10030         Y          iPadAir#1
## 33    10033         N          iPadAir#1
##                                                                                                descr.my
## 30                              Comes with USB Cable and wall adapter.  May have minor dings or scuffs.
## 33 We are selling good quality iPads that have been fully tested by an Apple Certified Technician. The 
##    D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 30          0       0           0       0 0.000000          0         0
## 33          0       0           0       0 0.417059          0         0
##     D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 30 0.0000000        0         0        0       0             0        0
## 33 0.3908446        0         0        0       0             0        0
##    D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 30  0.000000          0 0.5005798        0         0        0           0
## 33  0.546239          0 0.0000000        0         0        0           0
##    D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 30          0        0        0          0       0         0         0
## 33          0        0        0          0       0         0         0
##    D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 30         0        0          0        0        0        0        0
## 33         0        0          0        0        0        0        0
##    D.T.clean D.T.will  D.T.appl D.T.charger D.T.damag D.T.X100  D.T.come
## 30         0        0 0.0000000           0         0        0 0.6215803
## 33         0        0 0.6103266           0         0        0 0.0000000
##    D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 30 0.6648072          0         0          0            0        0
## 33 0.0000000          0         0          0            0        0
##    D.T.refurbish D.T.show D.T.shape D.T.read  D.T.test D.T.pictur D.T.bare
## 30             0        0         0        0 0.0000000          0        0
## 33             0        0         0        0 0.6851255          0        0
##    D.T.brand D.T.list   D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 30         0        0 0.6808506        0           0          0        0
## 33         0        0 0.0000000        0           0          0        0
##    D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori  D.T.ding D.T.near
## 30          0          0       0       0             0 0.7479696        0
## 33          0          0       0       0             0 0.0000000        0
##    D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 30         0         0        0           0         0        0         0
## 33         0         0        0           0         0        0         0
##    D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 30             0        0         0            0          0         0
## 33             0        0         0            0          0         0
##    D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 30           0        0       0       0        0          0        0
## 33           0        0       0       0        0          0        0
##    D.T.sinc D.T.imei  D.T.cabl D.T.seal D.T.profession D.T.overal
## 30        0        0 0.8006286        0              0          0
## 33        0        0 0.0000000        0              0          0
##    D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 30          0         0        0         0       0          0        0
## 33          0         0        0         0       0          0        0
##    D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 30          0         0         0           0              0        0
## 33          0         0         0           0              0        0
##    D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 30   0.0000000        0        0       0           0           0
## 33   0.8948505        0        0       0           0           0
##    D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 30            0       0        0         0           0         0
## 33            0       0        0         0           0         0
##    D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 30           0          0        0           0        0   0.0000000
## 33           0          0        0           0        0   0.9767356
##    D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 30         0       0         0           0          0       0.000000
## 33         0       0         0           0          0       1.005962
##    D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 30         0          0        0           0        0          0         0
## 33         0          0        0           0        0          0         0
##    D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 30           0         0       0          0          0         0
## 33           0         0       0          0          0         0
##    D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 30          0         0          0            0           0         0
## 33          0         0          0            0           0         0
##    D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 30           0        0        0       0         0         0        0
## 33           0        0        0       0         0         0        0
##    D.P.spacegray
## 30             0
## 33             0
## [1] "min distance(-0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr        descr.my D.T.condit
## 1322    11323         Y          iPadAir#1 Great Condition   1.206343
## 2337    12339      <NA>          iPadAir#1 great condition   1.206343
##      D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad
## 1322       0           0       0        0          0  2.204899        0
## 2337       0           0       0        0          0  2.204899        0
##      D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 1322        0         0        0       0             0        0         0
## 2337        0         0        0       0             0        0         0
##      D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 1322          0         0        0         0        0           0
## 2337          0         0        0         0        0           0
##      D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 1322          0        0        0          0       0         0         0
## 2337          0        0        0          0       0         0         0
##      D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 1322         0        0          0        0        0        0        0
## 2337         0        0          0        0        0        0        0
##      D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 1322         0        0        0           0         0        0        0
## 2337         0        0        0           0         0        0        0
##      D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 1322         0          0         0          0            0        0
## 2337         0          0         0          0            0        0
##      D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 1322             0        0         0        0        0          0
## 2337             0        0         0        0        0          0
##      D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 1322        0         0        0       0        0           0          0
## 2337        0         0        0       0        0           0          0
##      D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 1322        0          0          0       0       0             0        0
## 2337        0          0          0       0       0             0        0
##      D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 1322        0         0         0        0           0         0        0
## 2337        0         0         0        0           0         0        0
##      D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 1322         0             0        0         0            0          0
## 2337         0             0        0         0            0          0
##      D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 1322         0           0        0       0       0        0          0
## 2337         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 1322        0        0        0        0        0              0
## 2337        0        0        0        0        0              0
##      D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 1322          0          0         0        0         0       0          0
## 2337          0          0         0        0         0       0          0
##      D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 1322        0          0         0         0           0              0
## 2337        0          0         0         0           0              0
##      D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 1322        0           0        0        0       0           0
## 2337        0           0        0        0       0           0
##      D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 1322           0            0       0        0         0           0
## 2337           0            0       0        0         0           0
##      D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 1322         0           0          0        0           0        0
## 2337         0           0          0        0           0        0
##      D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 1322           0         0       0         0           0          0
## 2337           0         0       0         0           0          0
##      D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 1322              0         0          0        0           0        0
## 2337              0         0          0        0           0        0
##      D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 1322          0         0           0         0       0          0
## 2337          0         0           0         0       0          0
##      D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 1322          0         0          0         0          0            0
## 2337          0         0          0         0          0            0
##      D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 1322           0         0           0        0        0       0         0
## 2337           0         0           0        0        0       0         0
##      D.P.white D.P.gold D.P.spacegray
## 1322         0        0             0
## 2337         0        0             0

## [1] "Category: iPadmini 2+#0"
## [1] "max distance(0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 4      10004         N      iPadmini 2+#0                   0       0
## 4.1    10004         N      iPadmini 2+#0                   0       0
##     D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 4             0       0        0          0         0        0        0
## 4.1           0       0        0          0         0        0        0
##     D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 4           0        0       0             0        0         0          0
## 4.1         0        0       0             0        0         0          0
##     D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 4           0        0         0        0           0          0        0
## 4.1         0        0         0        0           0          0        0
##     D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 4          0          0       0         0         0         0        0
## 4.1        0          0       0         0         0         0        0
##     D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 4            0        0        0        0        0         0        0
## 4.1          0        0        0        0        0         0        0
##     D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 4          0           0         0        0        0         0          0
## 4.1        0           0         0        0        0         0          0
##     D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 4           0          0            0        0             0        0
## 4.1         0          0            0        0             0        0
##     D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 4           0        0        0          0        0         0        0
## 4.1         0        0        0          0        0         0        0
##     D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 4         0        0           0          0        0          0          0
## 4.1       0        0           0          0        0          0          0
##     D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 4         0       0             0        0        0         0         0
## 4.1       0       0             0        0        0         0         0
##     D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 4          0           0         0        0         0             0
## 4.1        0           0         0        0         0             0
##     D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 4          0         0            0          0         0           0
## 4.1        0         0            0          0         0           0
##     D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 4          0       0       0        0          0        0        0
## 4.1        0       0       0        0          0        0        0
##     D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 4          0        0        0              0          0          0
## 4.1        0        0        0              0          0          0
##     D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 4           0        0         0       0          0        0          0
## 4.1         0        0         0       0          0        0          0
##     D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 4           0         0           0              0        0           0
## 4.1         0         0           0              0        0           0
##     D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 4          0        0       0           0           0            0       0
## 4.1        0        0       0           0           0            0       0
##     D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 4          0         0           0         0           0          0
## 4.1        0         0           0         0           0          0
##     D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 4          0           0        0           0         0       0         0
## 4.1        0           0        0           0         0       0         0
##     D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 4             0          0              0         0          0        0
## 4.1           0          0              0         0          0        0
##     D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 4             0        0          0         0           0         0
## 4.1           0        0          0         0           0         0
##     D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 4         0          0          0         0          0         0
## 4.1       0          0          0         0          0         0
##     D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 4            0            0           0         0           0        0
## 4.1          0            0           0         0           0        0
##     D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 4          0       0         0         0        0             0
## 4.1        0       0         0         0        0             0
## [1] "min distance(0.0000) pair:"
##   UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 4    10004         N      iPadmini 2+#0                   0       0
## 6    10006         Y      iPadmini 2+#0                   0       0
##   D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 4           0       0        0          0         0        0        0
## 6           0       0        0          0         0        0        0
##   D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 4         0        0       0             0        0         0          0
## 6         0        0       0             0        0         0          0
##   D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 4         0        0         0        0           0          0        0
## 6         0        0         0        0           0          0        0
##   D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 4        0          0       0         0         0         0        0
## 6        0          0       0         0         0         0        0
##   D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 4          0        0        0        0        0         0        0
## 6          0        0        0        0        0         0        0
##   D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 4        0           0         0        0        0         0          0
## 6        0           0         0        0        0         0          0
##   D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 4         0          0            0        0             0        0
## 6         0          0            0        0             0        0
##   D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 4         0        0        0          0        0         0        0
## 6         0        0        0          0        0         0        0
##   D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 4       0        0           0          0        0          0          0
## 6       0        0           0          0        0          0          0
##   D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 4       0       0             0        0        0         0         0
## 6       0       0             0        0        0         0         0
##   D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 4        0           0         0        0         0             0        0
## 6        0           0         0        0         0             0        0
##   D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair D.T.air
## 4         0            0          0         0           0        0       0
## 6         0            0          0         0           0        0       0
##   D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal
## 4       0        0          0        0        0        0        0        0
## 6       0        0          0        0        0        0        0        0
##   D.T.profession D.T.overal D.T.retail D.T.refer D.T.left D.T.stock
## 4              0          0          0         0        0         0
## 6              0          0          0         0        0         0
##   D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem
## 4       0          0        0          0         0         0           0
## 6       0          0        0          0         0         0           0
##   D.T.manufactur D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg
## 4              0        0           0        0        0       0
## 6              0        0           0        0        0       0
##   D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad
## 4           0           0            0       0        0         0
## 6           0           0            0       0        0         0
##   D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain
## 4           0         0           0          0        0           0
## 6           0         0           0          0        0           0
##   D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 4        0           0         0       0         0           0          0
## 6        0           0         0       0         0           0          0
##   D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 4              0         0          0        0           0        0
## 6              0         0          0        0           0        0
##   D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 4          0         0           0         0       0          0          0
## 6          0         0           0         0       0          0          0
##   D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 4         0          0         0          0            0           0
## 6         0          0         0          0            0           0
##   D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 4         0           0        0        0       0         0         0
## 6         0           0        0        0       0         0         0
##   D.P.gold D.P.spacegray
## 4        0             0
## 6        0             0

## [1] "Category: iPadmini 2+#1"
## [1] "max distance(1.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr
## 18     10018         N      iPadmini 2+#1
## 101    10101         Y      iPadmini 2+#1
##                                                                                                 descr.my
## 18  We are selling good quality iPads that have been fully tested by an Apple Certified Technician. The 
## 101  This item is in Excellent cosmetic condition. It will not have any scratches on the screen. It may 
##     D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 18   0.0000000       0   0.0000000       0 0.417059  0.0000000         0
## 101  0.3015858       0   0.4019639       0 0.000000  0.4551091         0
##      D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function.  D.T.item
## 18  0.3908446        0 0.0000000        0       0             0 0.0000000
## 101 0.0000000        0 0.5914658        0       0             0 0.5455444
##     D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 18   0.546239  0.0000000         0        0         0        0           0
## 101  0.000000  0.5631522         0        0         0        0           0
##     D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 18           0        0        0          0       0         0         0
## 101          0        0        0          0       0         0         0
##     D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 18          0        0          0        0        0        0        0
## 101         0        0          0        0        0        0        0
##     D.T.clean  D.T.will  D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 18          0 0.0000000 0.6103266           0         0        0        0
## 101         0 0.7612725 0.0000000           0         0        0        0
##     D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 18          0          0         0          0            0        0
## 101         0          0         0          0            0        0
##     D.T.refurbish D.T.show D.T.shape D.T.read  D.T.test D.T.pictur
## 18              0        0         0        0 0.6851255          0
## 101             0        0         0        0 0.0000000          0
##     D.T.bare D.T.brand D.T.list   D.T.may D.T.mark D.T.blemish D.T.packag
## 18         0         0        0 0.0000000        0           0          0
## 101        0         0        0 0.7659569        0           0          0
##     D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 18         0          0          0       0       0             0        0
## 101        0          0          0       0       0             0        0
##     D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 18         0         0         0        0           0         0        0
## 101        0         0         0        0           0         0        0
##     D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 18          0             0        0         0            0          0
## 101         0             0        0         0            0          0
##     D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 18          0           0        0       0       0        0          0
## 101         0           0        0       0       0        0          0
##     D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 18         0        0        0        0        0              0          0
## 101        0        0        0        0        0              0          0
##     D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 18           0         0        0         0       0          0        0
## 101          0         0        0         0       0          0        0
##     D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 18           0         0         0           0              0        0
## 101          0         0         0           0              0        0
##     D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 18    0.8948505        0        0       0           0           0
## 101   0.0000000        0        0       0           0           0
##     D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 18             0       0        0         0           0         0
## 101            0       0        0         0           0         0
##     D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 18            0          0        0           0        0   0.9767356
## 101           0          0        0           0        0   0.0000000
##     D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 18          0       0         0           0          0       1.005962
## 101         0       0         0           0          0       0.000000
##     D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 18          0          0        0           0        0          0
## 101         0          0        0           0        0          0
##     D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 18          0           0         0       0          0          0
## 101         0           0         0       0          0          0
##     D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 18          0          0         0          0            0           0
## 101         0          0         0          0            0           0
##     D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 18          0           0        0        0       0         0         0
## 101         0           0        0        0       0         0         0
##     D.P.gold D.P.spacegray
## 18         0             0
## 101        0             0
## [1] "min distance(0.0000) pair:"
##      UniqueID sold.fctr prdl.my.descr.fctr                        descr.my
## 2152    12154      <NA>      iPadmini 2+#1      Only Opened box but is new
## 2443    12445      <NA>      iPadmini 2+#1 New Opened Box powered up only.
##      D.T.condit D.T.use D.T.scratch  D.T.new D.T.good D.T.screen D.T.great
## 2152          0       0           0 1.363393        0          0         0
## 2443          0       0           0 1.022545        0          0         0
##      D.T.ipad D.T.work D.T.excel D.T.like  D.T.box D.T.function. D.T.item
## 2152        0        0         0        0 1.531408             0        0
## 2443        0        0         0        0 1.148556             0        0
##      D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 2152         0          0         0        0         0        0
## 2443         0          0         0        0         0        0
##      D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 2152           0          0        0        0          0       0         0
## 2443           0          0        0        0          0       0         0
##      D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 2152         0         0        0          0        0        0        0
## 2443         0         0        0          0        0        0        0
##      D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 2152 1.972050         0        0        0           0         0        0
## 2443 1.479038         0        0        0           0         0        0
##      D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 2152        0         0          0         0          0            0
## 2443        0         0          0         0          0            0
##      D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 2152        0             0        0         0        0        0
## 2443        0             0        0         0        0        0
##      D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 2152          0        0         0        0       0        0           0
## 2443          0        0         0        0       0        0           0
##      D.T.packag D.T.mini D.T.affect D.T.normal D.T.tab D.T.top
## 2152          0        0          0          0       0       0
## 2443          0        0          0          0       0       0
##      D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 2152             0        0        0         0         0        0
## 2443             0        0        0         0         0        0
##      D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 2152           0         0        0         0             0        0
## 2443           0         0        0         0             0        0
##      D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 2152         0            0          0         0           0        0
## 2443         0            0          0         0           0        0
##      D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 2152       0       0        0          0        0        0        0
## 2443       0       0        0          0        0        0        0
##      D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 2152        0        0              0          0          0         0
## 2443        0        0              0          0          0         0
##      D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 2152        0         0       0          0        0          0         0
## 2443        0         0       0          0        0          0         0
##      D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 2152         0           0              0        0           0        0
## 2443         0           0              0        0           0        0
##      D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 2152        0       0           0           0            0       0
## 2443        0       0           0           0            0       0
##      D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 2152        0         0           0         0           0          0
## 2443        0         0           0         0           0          0
##      D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 2152        0           0        0           0         0       0         0
## 2443        0           0        0           0         0       0         0
##      D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 2152           0          0              0         0          0        0
## 2443           0          0              0         0          0        0
##      D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 2152           0        0          0         0           0         0
## 2443           0        0          0         0           0         0
##      D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 2152       0          0          0         0          0         0
## 2443       0          0          0         0          0         0
##      D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 2152          0            0           0         0           0        0
## 2443          0            0           0         0           0        0
##      D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 2152        0       0         0         0        0             0
## 2443        0       0         0         0        0             0

## [1] "Category: iPadmini#0"
## [1] "max distance(0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 7      10007         Y         iPadmini#0                   0       0
## 7.1    10007         Y         iPadmini#0                   0       0
##     D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 7             0       0        0          0         0        0        0
## 7.1           0       0        0          0         0        0        0
##     D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 7           0        0       0             0        0         0          0
## 7.1         0        0       0             0        0         0          0
##     D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 7           0        0         0        0           0          0        0
## 7.1         0        0         0        0           0          0        0
##     D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 7          0          0       0         0         0         0        0
## 7.1        0          0       0         0         0         0        0
##     D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 7            0        0        0        0        0         0        0
## 7.1          0        0        0        0        0         0        0
##     D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 7          0           0         0        0        0         0          0
## 7.1        0           0         0        0        0         0          0
##     D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 7           0          0            0        0             0        0
## 7.1         0          0            0        0             0        0
##     D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 7           0        0        0          0        0         0        0
## 7.1         0        0        0          0        0         0        0
##     D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 7         0        0           0          0        0          0          0
## 7.1       0        0           0          0        0          0          0
##     D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 7         0       0             0        0        0         0         0
## 7.1       0       0             0        0        0         0         0
##     D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 7          0           0         0        0         0             0
## 7.1        0           0         0        0         0             0
##     D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 7          0         0            0          0         0           0
## 7.1        0         0            0          0         0           0
##     D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 7          0       0       0        0          0        0        0
## 7.1        0       0       0        0          0        0        0
##     D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 7          0        0        0              0          0          0
## 7.1        0        0        0              0          0          0
##     D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 7           0        0         0       0          0        0          0
## 7.1         0        0         0       0          0        0          0
##     D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 7           0         0           0              0        0           0
## 7.1         0         0           0              0        0           0
##     D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 7          0        0       0           0           0            0       0
## 7.1        0        0       0           0           0            0       0
##     D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 7          0         0           0         0           0          0
## 7.1        0         0           0         0           0          0
##     D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 7          0           0        0           0         0       0         0
## 7.1        0           0        0           0         0       0         0
##     D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 7             0          0              0         0          0        0
## 7.1           0          0              0         0          0        0
##     D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 7             0        0          0         0           0         0
## 7.1           0        0          0         0           0         0
##     D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 7         0          0          0         0          0         0
## 7.1       0          0          0         0          0         0
##     D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 7            0            0           0         0           0        0
## 7.1          0            0           0         0           0        0
##     D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 7          0       0         0         0        0             0
## 7.1        0       0         0         0        0             0
## [1] "min distance(0.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 7     10007         Y         iPadmini#0                   0       0
## 57    10057         N         iPadmini#0                   0       0
##    D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 7            0       0        0          0         0        0        0
## 57           0       0        0          0         0        0        0
##    D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 7          0        0       0             0        0         0          0
## 57         0        0       0             0        0         0          0
##    D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 7          0        0         0        0           0          0        0
## 57         0        0         0        0           0          0        0
##    D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 7         0          0       0         0         0         0        0
## 57        0          0       0         0         0         0        0
##    D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 7           0        0        0        0        0         0        0
## 57          0        0        0        0        0         0        0
##    D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 7         0           0         0        0        0         0          0
## 57        0           0         0        0        0         0          0
##    D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 7          0          0            0        0             0        0
## 57         0          0            0        0             0        0
##    D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 7          0        0        0          0        0         0        0
## 57         0        0        0          0        0         0        0
##    D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 7        0        0           0          0        0          0          0
## 57       0        0           0          0        0          0          0
##    D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 7        0       0             0        0        0         0         0
## 57       0       0             0        0        0         0         0
##    D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 7         0           0         0        0         0             0
## 57        0           0         0        0         0             0
##    D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 7         0         0            0          0         0           0
## 57        0         0            0          0         0           0
##    D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 7         0       0       0        0          0        0        0        0
## 57        0       0       0        0          0        0        0        0
##    D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 7         0        0              0          0          0         0
## 57        0        0              0          0          0         0
##    D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 7         0         0       0          0        0          0         0
## 57        0         0       0          0        0          0         0
##    D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 7          0           0              0        0           0        0
## 57         0           0              0        0           0        0
##    D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 7         0       0           0           0            0       0        0
## 57        0       0           0           0            0       0        0
##    D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 7          0           0         0           0          0        0
## 57         0           0         0           0          0        0
##    D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 7            0        0           0         0       0         0
## 57           0        0           0         0       0         0
##    D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 7            0          0              0         0          0        0
## 57           0          0              0         0          0        0
##    D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 7            0        0          0         0           0         0       0
## 57           0        0          0         0           0         0       0
##    D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 7           0          0         0          0         0          0
## 57          0          0         0          0         0          0
##    D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 7             0           0         0           0        0        0
## 57            0           0         0           0        0        0
##    D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 7        0         0         0        0             0
## 57       0         0         0        0             0

## [1] "Category: iPadmini#1"
## [1] "max distance(1.0000) pair:"
##    UniqueID sold.fctr prdl.my.descr.fctr
## 60    10060         N         iPadmini#1
## 76    10076         Y         iPadmini#1
##                                                                                        descr.my
## 60                       Minor scuffs in the back. Otherwise looks flawless.  See all pictures.
## 76 Works perfectly, NOT iCloud locked, 1 owner. It is in not in very good condition, but works 
##    D.T.condit D.T.use D.T.scratch D.T.new  D.T.good D.T.screen D.T.great
## 60  0.0000000       0           0       0 0.0000000          0         0
## 76  0.3015858       0           0       0 0.4691913          0         0
##    D.T.ipad  D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 60        0 0.0000000         0        0       0             0        0
## 76        0 0.9365565         0        0       0             0        0
##    D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 60         0          0 0.5631522        0         0        0   0.0000000
## 76         0          0 0.0000000        0         0        0   0.6992778
##    D.T.includ  D.T.lock D.T.case D.T.icloud   D.T.see D.T.light D.T.devic
## 60          0 0.0000000        0  0.0000000 0.7059578         0         0
## 76          0 0.7436647        0  0.6960284 0.0000000         0         0
##    D.T.pleas  D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 60         0 0.6385995          0        0        0        0        0
## 76         0 0.0000000          0        0        0        0        0
##    D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 60         0        0        0           0         0        0        0
## 76         0        0        0           0         0        0        0
##    D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 60 0.7479081          0         0          0            0        0
## 76 0.0000000          0         0          0            0        0
##    D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 60             0        0         0        0        0  0.7913985        0
## 76             0        0         0        0        0  0.0000000        0
##    D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 60         0        0       0        0           0          0        0
## 76         0        0       0        0           0          0        0
##    D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 60          0          0       0       0             0        0        0
## 76          0          0       0       0             0        0        0
##    D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 60         0         0        0           0         0        0         0
## 76         0         0        0           0         0        0         0
##    D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 60             0        0         0            0          0         0
## 76             0        0         0            0          0         0
##    D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 60           0        0       0       0        0          0        0
## 76           0        0       0       0        0          0        0
##    D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 60        0        0        0        0              0          0
## 76        0        0        0        0              0          0
##    D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 60          0         0        0         0       0          0        0
## 76          0         0        0         0       0          0        0
##    D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 60          0         0         0           0              0        0
## 76          0         0         0           0              0        0
##    D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 60           0        0        0       0           0           0
## 76           0        0        0       0           0           0
##    D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 60            0       0        0         0           0         0
## 76            0       0        0         0           0         0
##    D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 60           0          0        0           0        0           0
## 76           0          0        0           0        0           0
##    D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 60         0       0         0           0          0              0
## 76         0       0         0           0          0              0
##    D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 60         0          0        0           0        0          0         0
## 76         0          0        0           0        0          0         0
##    D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 60           0         0       0          0          0         0
## 76           0         0       0          0          0         0
##    D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 60          0         0          0            0           0         0
## 76          0         0          0            0           0         0
##    D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 60           0        0        0       0         0         0        0
## 76           0        0        0       0         0         0        0
##    D.P.spacegray
## 60             0
## 76             0
## [1] "min distance(-0.0000) pair:"
##     UniqueID sold.fctr prdl.my.descr.fctr
## 335    10335         N         iPadmini#1
## 663    10663         N         iPadmini#1
##                                                                                                  descr.my
## 335 This item is used but well taken care of no cosmetic wears at all . Any other questions feel free to 
## 663 This item is used but well taken care of no cosmetic wears at all . Any other questions feel free to 
##     D.T.condit   D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 335          0 0.3190707           0       0        0          0         0
## 663          0 0.3190707           0       0        0          0         0
##     D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function.  D.T.item
## 335        0        0         0        0       0             0 0.4364355
## 663        0        0         0        0       0             0 0.4364355
##     D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack  D.T.wear
## 335         0  0.4505218         0        0         0 0.4717371
## 663         0  0.4505218         0        0         0 0.4717371
##     D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 335           0          0        0        0          0       0         0
## 663           0          0        0        0          0       0         0
##     D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 335         0         0        0          0        0        0        0
## 663         0         0        0          0        0        0        0
##     D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 335        0         0        0        0           0         0        0
## 663        0         0        0        0           0         0        0
##     D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 335        0         0          0         0          0            0
## 663        0         0          0         0          0            0
##     D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 335        0             0        0         0        0        0          0
## 663        0             0        0         0        0        0          0
##     D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 335        0         0        0       0        0           0          0
## 663        0         0        0       0        0           0          0
##     D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 335        0          0          0       0       0             0        0
## 663        0          0          0       0       0             0        0
##     D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 335        0         0         0        0           0         0        0
## 663        0         0         0        0           0         0        0
##     D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 335         0             0        0         0            0          0
## 663         0             0        0         0            0          0
##     D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 335         0           0        0       0       0        0          0
## 663         0           0        0       0       0        0          0
##      D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 335 0.7127655        0        0        0        0              0
## 663 0.7127655        0        0        0        0              0
##     D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 335          0          0         0        0         0       0          0
## 663          0          0         0        0         0       0          0
##     D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 335        0          0         0         0           0              0
## 663        0          0         0         0           0              0
##     D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 335        0           0        0        0       0           0           0
## 663        0           0        0        0       0           0           0
##     D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 335            0       0        0         0           0         0
## 663            0       0        0         0           0         0
##     D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 335           0          0        0           0        0           0
## 663           0          0        0           0        0           0
##     D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 335         0       0         0           0          0              0
## 663         0       0         0           0          0              0
##     D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 335         0          0        0           0        0          0
## 663         0          0        0           0        0          0
##     D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 335         0           0         0       0          0          0
## 663         0           0         0       0          0          0
##     D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 335         0          0         0          0            0           0
## 663         0          0         0          0            0           0
##     D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 335         0           0        0        0       0         0         0
## 663         0           0        0        0       0         0         0
##     D.P.gold D.P.spacegray
## 335        0             0
## 663        0             0

## [1] "glb_allobs_df$prdl.my.descr.fctr$.clusterid Entropy: 0.6570 (96.9282 pct)"
##    prdl.my.descr.fctr.clusterid   N  Y  .entropy .knt
## 1                   Unknown#0_1  72 47 0.6709143  119
## 2                   Unknown#1_1  25 13 0.6424220   38
## 3                   Unknown#1_2  17  9 0.6450332   26
## 4                   Unknown#1_3   4  5 0.6869616    9
## 5                   Unknown#1_4   0  6 0.0000000    6
## 6                    iPad 1#0_1  53 69 0.6845225  122
## 7                    iPad 1#1_1  12 18 0.6730117   30
## 8                    iPad 1#1_2  13 16 0.6877868   29
## 9                    iPad 1#1_3  12  7 0.6581100   19
## 10                   iPad 1#1_4   8  7 0.6909233   15
## 11                   iPad 1#1_5   2  8 0.5004024   10
## 12                   iPad 2#0_1  57 80 0.6789878  137
## 13                   iPad 2#1_1  31 24 0.6850260   55
## 14                   iPad 2#1_2  10 14 0.6791933   24
## 15                   iPad 2#1_3  19  5 0.5117401   24
## 16                   iPad 2#1_4   9 11 0.6881388   20
## 17                   iPad 2#1_5   8  8 0.6931472   16
## 18                   iPad 2#1_6   7  5 0.6791933   12
## 19                  iPad 3+#0_1  58 87 0.6730117  145
## 20                  iPad 3+#1_1  26 20 0.6846163   46
## 21                  iPad 3+#1_2  25 14 0.6528258   39
## 22                  iPad 3+#1_3  14  6 0.6108643   20
## 23                  iPad 3+#1_4  16  2 0.3488321   18
## 24                  iPad 3+#1_5   9 11 0.6881388   20
## 25                  iPad 3+#1_6   5  5 0.6931472   10
## 26                  iPad 3+#1_7  13  0 0.0000000   13
## 27                  iPadAir#0_1 125 95 0.6838206  220
## 28                  iPadAir#1_1  30 25 0.6890092   55
## 29                  iPadAir#1_2  19 18 0.6927819   37
## 30                  iPadAir#1_3  13  6 0.6236548   19
## 31                  iPadAir#1_4  13  4 0.5455946   17
## 32                  iPadAir#1_5   3  2 0.6730117    5
## 33              iPadmini 2+#0_1  95 59 0.6655694  154
## 34              iPadmini 2+#1_1  12  8 0.6730117   20
## 35              iPadmini 2+#1_2   7 12 0.6581100   19
## 36              iPadmini 2+#1_3   5  1 0.4505612    6
## 37              iPadmini 2+#1_4   6  0 0.0000000    6
## 38                 iPadmini#0_1  94 79 0.6893836  173
## 39                 iPadmini#1_1  13  9 0.6765260   22
## 40                 iPadmini#1_2  13  7 0.6474466   20
## 41                 iPadmini#1_3   8  7 0.6909233   15
## 42                 iPadmini#1_4   2 11 0.4293230   13
## 43                 iPadmini#1_5   5  8 0.6662784   13
## 44                 iPadmini#1_6   6  9 0.6730117   15
## 45                 iPadmini#1_7   5  3 0.6615632    8
# Last call for data modifications 
#stop(here") # sav_allobs_df <- glb_allobs_df
# glb_allobs_df[(glb_allobs_df$PropR == 0.75) & (glb_allobs_df$State == "Hawaii"), "PropR.fctr"] <- "N"

# Re-partition
glb_trnobs_df <- subset(glb_allobs_df, .src == "Train")
glb_newobs_df <- subset(glb_allobs_df, .src == "Test")

glb_chunks_df <- myadd_chunk(glb_chunks_df, "select.features", major.inc=TRUE)
##                 label step_major step_minor    bgn    end elapsed
## 7 manage.missing.data          4          1 54.646 60.319   5.673
## 8     select.features          5          0 60.320     NA      NA

Step 5.0: select features

#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
print(glb_feats_df <- myselect_features(entity_df=glb_trnobs_df, 
                       exclude_vars_as_features=glb_exclude_vars_as_features, 
                       rsp_var=glb_rsp_var))
## Warning in cor(data.matrix(entity_df[, sel_feats]), y =
## as.numeric(entity_df[, : the standard deviation is zero
##                                                      id         cor.y
## sold                                               sold  1.0000000000
## biddable                                       biddable  0.5481788380
## startprice.log                           startprice.log -0.4674275376
## startprice                                   startprice -0.4569767211
## startprice.predict.                 startprice.predict. -0.3573141534
## startprice.diff                         startprice.diff -0.2794223471
## UniqueID                                       UniqueID -0.1895466260
## idseq.my                                       idseq.my -0.1895466260
## condition.fctr                           condition.fctr -0.1535490071
## D.T.hous                                       D.T.hous -0.1373919817
## D.npnct05.log                             D.npnct05.log -0.1180558939
## D.T.X100                                       D.T.X100 -0.1150127028
## D.T.near                                       D.T.near -0.0929819941
## D.T.list                                       D.T.list -0.0870905528
## D.T.fair                                       D.T.fair -0.0802848689
## D.terms.n.post.stop                 D.terms.n.post.stop -0.0800729927
## D.terms.n.post.stem                 D.terms.n.post.stem -0.0798677390
## D.npnct14.log                             D.npnct14.log -0.0786203827
## D.T.cosmet                                   D.T.cosmet -0.0777513602
## cellular.fctr                             cellular.fctr -0.0743297381
## D.T.profession                           D.T.profession -0.0712586605
## D.T.tab                                         D.T.tab -0.0707242028
## D.terms.n.post.stop.log         D.terms.n.post.stop.log -0.0638651730
## D.terms.n.post.stem.log         D.terms.n.post.stem.log -0.0638431167
## D.nwrds.unq.log                         D.nwrds.unq.log -0.0638431167
## D.ndgts.log                                 D.ndgts.log -0.0628684727
## D.T.overal                                   D.T.overal -0.0621057222
## D.npnct09.log                             D.npnct09.log -0.0618253281
## D.T.mint                                       D.T.mint -0.0610303678
## D.T.stock                                     D.T.stock -0.0607284075
## carrier.fctr                               carrier.fctr -0.0599089237
## D.T.alway                                     D.T.alway  0.0593444093
## D.npnct12.log                             D.npnct12.log -0.0593256462
## D.nwrds.log                                 D.nwrds.log -0.0588147403
## D.T.test                                       D.T.test -0.0586208334
## D.T.seller                                   D.T.seller -0.0584897212
## D.T.inspect                                 D.T.inspect -0.0568460093
## D.T.affect                                   D.T.affect -0.0566799690
## D.nchrs.log                                 D.nchrs.log -0.0565357348
## D.T.box                                         D.T.box -0.0563485768
## D.T.like                                       D.T.like -0.0557801451
## D.T.averag                                   D.T.averag -0.0555976359
## D.T.descript                               D.T.descript  0.0553987246
## D.nuppr.log                                 D.nuppr.log -0.0553358386
## D.ratio.nstopwrds.nwrds         D.ratio.nstopwrds.nwrds  0.0537832223
## D.T.phone                                     D.T.phone -0.0527118662
## D.T.origin                                   D.T.origin -0.0525252573
## D.T.left                                       D.T.left  0.0525031466
## D.npnct28.log                             D.npnct28.log -0.0524583244
## D.T.esn                                         D.T.esn -0.0517020813
## D.T.bare                                       D.T.bare -0.0509186819
## D.T.perfect                                 D.T.perfect -0.0504871511
## D.T.devic                                     D.T.devic -0.0504727874
## D.T.refer                                     D.T.refer  0.0503000028
## D.T.least                                     D.T.least  0.0500485566
## D.npnct06.log                             D.npnct06.log -0.0499761958
## D.T.wifi                                       D.T.wifi -0.0499453504
## D.T.handset                                 D.T.handset  0.0486468119
## D.npnct15.log                             D.npnct15.log  0.0484022793
## D.T.minor                                     D.T.minor -0.0483597041
## D.T.ship                                       D.T.ship -0.0483492299
## D.T.free                                       D.T.free -0.0478266395
## D.nstopwrds.log                         D.nstopwrds.log -0.0474681704
## D.npnct24.log                             D.npnct24.log -0.0458449965
## D.T.previous                               D.T.previous  0.0453194378
## D.npnct16.log                             D.npnct16.log -0.0449403962
## D.T.refurbish                             D.T.refurbish -0.0449149382
## D.T.two                                         D.T.two  0.0447161329
## D.T.top                                         D.T.top  0.0433671354
## D.T.technician                           D.T.technician -0.0430848435
## D.T.sync                                       D.T.sync -0.0430848435
## D.T.condit                                   D.T.condit -0.0418798096
## prdline.my.fctr                         prdline.my.fctr -0.0415814340
## D.T.sign                                       D.T.sign  0.0412800974
## D.T.function.                             D.T.function.  0.0397438087
## D.T.non                                         D.T.non  0.0397064496
## D.npnct08.log                             D.npnct08.log -0.0396513123
## D.T.heavili                                 D.T.heavili -0.0391978700
## color.fctr                                   color.fctr -0.0391372902
## D.T.certifi                                 D.T.certifi -0.0385931627
## D.T.broken                                   D.T.broken  0.0380340254
## D.npnct13.log                             D.npnct13.log -0.0373463069
## D.T.correct                                 D.T.correct -0.0373025158
## D.T.featur                                   D.T.featur -0.0373025158
## D.T.new                                         D.T.new -0.0372353149
## prdl.my.descr.fctr                   prdl.my.descr.fctr -0.0358461598
## D.T.contain                                 D.T.contain -0.0355839439
## D.T.corpor                                   D.T.corpor  0.0353706112
## D.T.name                                       D.T.name  0.0352663148
## D.T.will                                       D.T.will -0.0350451461
## D.T.scroll                                   D.T.scroll  0.0339028710
## D.T.button                                   D.T.button -0.0338831937
## D.T.crack                                     D.T.crack  0.0337091183
## D.T.imag                                       D.T.imag  0.0335537013
## D.T.minim                                     D.T.minim -0.0330865270
## D.TfIdf.sum.post.stem             D.TfIdf.sum.post.stem -0.0323742743
## D.sum.TfIdf                                 D.sum.TfIdf -0.0323742743
## .clusterid                                   .clusterid -0.0318360231
## .clusterid.fctr                         .clusterid.fctr -0.0318360231
## D.T.imei                                       D.T.imei -0.0308865534
## D.T.display                                 D.T.display -0.0307725689
## D.P.gold                                       D.P.gold -0.0304491748
## D.TfIdf.sum.post.stop             D.TfIdf.sum.post.stop -0.0303366192
## D.T.passcod                                 D.T.passcod -0.0303239925
## D.T.charger                                 D.T.charger  0.0301602937
## D.T.cabl                                       D.T.cabl -0.0296202577
## D.T.pic                                         D.T.pic -0.0292106355
## D.T.blemish                                 D.T.blemish -0.0291642840
## D.T.shape                                     D.T.shape  0.0288373334
## D.T.appl                                       D.T.appl -0.0287074081
## D.T.back                                       D.T.back  0.0272103292
## D.T.pictur                                   D.T.pictur  0.0271676520
## D.T.excel                                     D.T.excel  0.0265819659
## D.T.dent                                       D.T.dent  0.0264976918
## D.T.digit                                     D.T.digit  0.0261375762
## D.npnct03.log                             D.npnct03.log  0.0257637868
## D.T.bodi                                       D.T.bodi -0.0252978602
## D.T.qualiti                                 D.T.qualiti -0.0252899986
## D.T.super                                     D.T.super  0.0250040676
## D.T.els                                         D.T.els  0.0250040676
## D.T.disclaim                               D.T.disclaim  0.0250040676
## D.T.essenti                                 D.T.essenti  0.0250040676
## D.T.repeat.                                 D.T.repeat.  0.0250040676
## D.npnct07.log                             D.npnct07.log  0.0250040676
## D.T.light                                     D.T.light -0.0249083615
## D.T.fulli                                     D.T.fulli  0.0243976808
## D.npnct10.log                             D.npnct10.log -0.0241015016
## D.T.screen                                   D.T.screen  0.0232373651
## D.T.unit                                       D.T.unit -0.0231226779
## D.T.read                                       D.T.read -0.0226911615
## D.T.may                                         D.T.may  0.0225762388
## D.T.contact                                 D.T.contact  0.0218134520
## D.T.money                                     D.T.money -0.0215250231
## D.T.higher                                   D.T.higher -0.0215250231
## D.T.beetl                                     D.T.beetl -0.0215250231
## D.T.defens                                   D.T.defens -0.0215250231
## D.T.final                                     D.T.final -0.0215250231
## D.npnct18.log                             D.npnct18.log -0.0215250231
## D.T.kept                                       D.T.kept  0.0207141990
## D.T.tear                                       D.T.tear  0.0205481770
## D.T.open                                       D.T.open -0.0193952625
## D.npnct11.log                             D.npnct11.log -0.0192035548
## D.T.order                                     D.T.order -0.0188854872
## D.P.white                                     D.P.white  0.0184898845
## D.T.pleas                                     D.T.pleas  0.0178751143
## D.T.ipad                                       D.T.ipad -0.0177348755
## D.terms.n.stem.stop.Ratio     D.terms.n.stem.stop.Ratio  0.0175790908
## D.T.lock                                       D.T.lock  0.0173034228
## D.T.activ                                     D.T.activ -0.0166891768
## D.T.anoth                                     D.T.anoth  0.0164535903
## D.T.damag                                     D.T.damag -0.0160662651
## D.T.ding                                       D.T.ding  0.0155885916
## D.T.full                                       D.T.full  0.0149589509
## D.T.work                                       D.T.work -0.0145594907
## D.T.detail                                   D.T.detail -0.0139118798
## D.T.item                                       D.T.item -0.0134922662
## D.T.stylus                                   D.T.stylus -0.0125154705
## D.T.packag                                   D.T.packag  0.0124597147
## storage.fctr                               storage.fctr -0.0116754969
## D.T.edg                                         D.T.edg  0.0114844118
## D.T.must                                       D.T.must  0.0113915486
## D.P.mini                                       D.P.mini -0.0112418293
## D.T.photo                                     D.T.photo  0.0110333858
## D.T.problem                                 D.T.problem  0.0107375772
## D.T.seal                                       D.T.seal  0.0106898740
## D.T.come                                       D.T.come -0.0104488093
## D.T.corner                                   D.T.corner -0.0104287544
## D.T.brand                                     D.T.brand -0.0103861855
## D.T.use                                         D.T.use  0.0103720246
## D.T.scuff                                     D.T.scuff  0.0101340501
## D.T.lightn                                   D.T.lightn -0.0099034064
## D.T.speaker                                 D.T.speaker  0.0096402551
## D.ratio.sum.TfIdf.nwrds         D.ratio.sum.TfIdf.nwrds  0.0096247411
## D.P.air                                         D.P.air -0.0092629952
## D.T.includ                                   D.T.includ -0.0091767476
## D.T.side                                       D.T.side  0.0089049983
## D.T.mark                                       D.T.mark -0.0088438689
## D.T.scratch                                 D.T.scratch -0.0088060862
## D.T.icloud                                   D.T.icloud  0.0086539687
## D.T.keyboard                               D.T.keyboard  0.0082735718
## D.T.right                                     D.T.right -0.0080547459
## D.T.upper                                     D.T.upper  0.0078374765
## D.T.manufactur                           D.T.manufactur  0.0077942218
## D.T.mini                                       D.T.mini -0.0075528886
## D.T.sinc                                       D.T.sinc  0.0072330260
## D.T.great                                     D.T.great  0.0070063865
## D.T.chip                                       D.T.chip -0.0067464224
## D.T.geek                                       D.T.geek -0.0064074827
## D.T.squad                                     D.T.squad -0.0064074827
## D.T.protector                             D.T.protector  0.0057850197
## D.T.case                                       D.T.case  0.0057562564
## D.T.wear                                       D.T.wear -0.0048789708
## D.T.sticker                                 D.T.sticker  0.0042625126
## D.T.retail                                   D.T.retail -0.0042217335
## D.T.see                                         D.T.see  0.0041433566
## D.npnct01.log                             D.npnct01.log  0.0041255300
## D.P.spacegray                             D.P.spacegray  0.0034818565
## D.T.air                                         D.T.air -0.0029579942
## D.T.clean                                     D.T.clean  0.0025337892
## D.T.normal                                   D.T.normal  0.0019081337
## D.TfIdf.sum.stem.stop.Ratio D.TfIdf.sum.stem.stop.Ratio -0.0014568383
## .rnorm                                           .rnorm -0.0014350110
## D.P.black                                     D.P.black -0.0012485463
## D.T.small                                     D.T.small -0.0010738542
## D.T.accessori                             D.T.accessori  0.0007963083
## D.T.X2016                                     D.T.X2016 -0.0005289068
## D.T.good                                       D.T.good -0.0004368629
## D.T.show                                       D.T.show -0.0003156554
## D.T.expect                                   D.T.expect            NA
## D.T.intro                                     D.T.intro            NA
## D.npnct02.log                             D.npnct02.log            NA
## D.npnct04.log                             D.npnct04.log            NA
## D.npnct17.log                             D.npnct17.log            NA
## D.npnct19.log                             D.npnct19.log            NA
## D.npnct20.log                             D.npnct20.log            NA
## D.npnct21.log                             D.npnct21.log            NA
## D.npnct22.log                             D.npnct22.log            NA
## D.npnct23.log                             D.npnct23.log            NA
## D.npnct25.log                             D.npnct25.log            NA
## D.npnct26.log                             D.npnct26.log            NA
## D.npnct27.log                             D.npnct27.log            NA
## D.npnct29.log                             D.npnct29.log            NA
## D.npnct30.log                             D.npnct30.log            NA
## D.P.http                                       D.P.http            NA
##                             exclude.as.feat    cor.y.abs
## sold                                      1 1.0000000000
## biddable                                  0 0.5481788380
## startprice.log                            1 0.4674275376
## startprice                                1 0.4569767211
## startprice.predict.                       1 0.3573141534
## startprice.diff                           0 0.2794223471
## UniqueID                                  1 0.1895466260
## idseq.my                                  0 0.1895466260
## condition.fctr                            0 0.1535490071
## D.T.hous                                  1 0.1373919817
## D.npnct05.log                             0 0.1180558939
## D.T.X100                                  1 0.1150127028
## D.T.near                                  1 0.0929819941
## D.T.list                                  1 0.0870905528
## D.T.fair                                  1 0.0802848689
## D.terms.n.post.stop                       0 0.0800729927
## D.terms.n.post.stem                       0 0.0798677390
## D.npnct14.log                             0 0.0786203827
## D.T.cosmet                                1 0.0777513602
## cellular.fctr                             0 0.0743297381
## D.T.profession                            1 0.0712586605
## D.T.tab                                   1 0.0707242028
## D.terms.n.post.stop.log                   0 0.0638651730
## D.terms.n.post.stem.log                   0 0.0638431167
## D.nwrds.unq.log                           0 0.0638431167
## D.ndgts.log                               0 0.0628684727
## D.T.overal                                1 0.0621057222
## D.npnct09.log                             0 0.0618253281
## D.T.mint                                  1 0.0610303678
## D.T.stock                                 1 0.0607284075
## carrier.fctr                              0 0.0599089237
## D.T.alway                                 1 0.0593444093
## D.npnct12.log                             0 0.0593256462
## D.nwrds.log                               0 0.0588147403
## D.T.test                                  1 0.0586208334
## D.T.seller                                1 0.0584897212
## D.T.inspect                               1 0.0568460093
## D.T.affect                                1 0.0566799690
## D.nchrs.log                               0 0.0565357348
## D.T.box                                   1 0.0563485768
## D.T.like                                  1 0.0557801451
## D.T.averag                                1 0.0555976359
## D.T.descript                              1 0.0553987246
## D.nuppr.log                               0 0.0553358386
## D.ratio.nstopwrds.nwrds                   0 0.0537832223
## D.T.phone                                 1 0.0527118662
## D.T.origin                                1 0.0525252573
## D.T.left                                  1 0.0525031466
## D.npnct28.log                             0 0.0524583244
## D.T.esn                                   1 0.0517020813
## D.T.bare                                  1 0.0509186819
## D.T.perfect                               1 0.0504871511
## D.T.devic                                 1 0.0504727874
## D.T.refer                                 1 0.0503000028
## D.T.least                                 1 0.0500485566
## D.npnct06.log                             0 0.0499761958
## D.T.wifi                                  1 0.0499453504
## D.T.handset                               1 0.0486468119
## D.npnct15.log                             0 0.0484022793
## D.T.minor                                 1 0.0483597041
## D.T.ship                                  1 0.0483492299
## D.T.free                                  1 0.0478266395
## D.nstopwrds.log                           0 0.0474681704
## D.npnct24.log                             0 0.0458449965
## D.T.previous                              1 0.0453194378
## D.npnct16.log                             0 0.0449403962
## D.T.refurbish                             1 0.0449149382
## D.T.two                                   1 0.0447161329
## D.T.top                                   1 0.0433671354
## D.T.technician                            1 0.0430848435
## D.T.sync                                  1 0.0430848435
## D.T.condit                                1 0.0418798096
## prdline.my.fctr                           1 0.0415814340
## D.T.sign                                  1 0.0412800974
## D.T.function.                             1 0.0397438087
## D.T.non                                   1 0.0397064496
## D.npnct08.log                             0 0.0396513123
## D.T.heavili                               1 0.0391978700
## color.fctr                                0 0.0391372902
## D.T.certifi                               1 0.0385931627
## D.T.broken                                1 0.0380340254
## D.npnct13.log                             0 0.0373463069
## D.T.correct                               1 0.0373025158
## D.T.featur                                1 0.0373025158
## D.T.new                                   1 0.0372353149
## prdl.my.descr.fctr                        0 0.0358461598
## D.T.contain                               1 0.0355839439
## D.T.corpor                                1 0.0353706112
## D.T.name                                  1 0.0352663148
## D.T.will                                  1 0.0350451461
## D.T.scroll                                1 0.0339028710
## D.T.button                                1 0.0338831937
## D.T.crack                                 1 0.0337091183
## D.T.imag                                  1 0.0335537013
## D.T.minim                                 1 0.0330865270
## D.TfIdf.sum.post.stem                     0 0.0323742743
## D.sum.TfIdf                               0 0.0323742743
## .clusterid                                1 0.0318360231
## .clusterid.fctr                           0 0.0318360231
## D.T.imei                                  1 0.0308865534
## D.T.display                               1 0.0307725689
## D.P.gold                                  1 0.0304491748
## D.TfIdf.sum.post.stop                     0 0.0303366192
## D.T.passcod                               1 0.0303239925
## D.T.charger                               1 0.0301602937
## D.T.cabl                                  1 0.0296202577
## D.T.pic                                   1 0.0292106355
## D.T.blemish                               1 0.0291642840
## D.T.shape                                 1 0.0288373334
## D.T.appl                                  1 0.0287074081
## D.T.back                                  1 0.0272103292
## D.T.pictur                                1 0.0271676520
## D.T.excel                                 1 0.0265819659
## D.T.dent                                  1 0.0264976918
## D.T.digit                                 1 0.0261375762
## D.npnct03.log                             0 0.0257637868
## D.T.bodi                                  1 0.0252978602
## D.T.qualiti                               1 0.0252899986
## D.T.super                                 1 0.0250040676
## D.T.els                                   1 0.0250040676
## D.T.disclaim                              1 0.0250040676
## D.T.essenti                               1 0.0250040676
## D.T.repeat.                               1 0.0250040676
## D.npnct07.log                             0 0.0250040676
## D.T.light                                 1 0.0249083615
## D.T.fulli                                 1 0.0243976808
## D.npnct10.log                             0 0.0241015016
## D.T.screen                                1 0.0232373651
## D.T.unit                                  1 0.0231226779
## D.T.read                                  1 0.0226911615
## D.T.may                                   1 0.0225762388
## D.T.contact                               1 0.0218134520
## D.T.money                                 1 0.0215250231
## D.T.higher                                1 0.0215250231
## D.T.beetl                                 1 0.0215250231
## D.T.defens                                1 0.0215250231
## D.T.final                                 1 0.0215250231
## D.npnct18.log                             0 0.0215250231
## D.T.kept                                  1 0.0207141990
## D.T.tear                                  1 0.0205481770
## D.T.open                                  1 0.0193952625
## D.npnct11.log                             0 0.0192035548
## D.T.order                                 1 0.0188854872
## D.P.white                                 1 0.0184898845
## D.T.pleas                                 1 0.0178751143
## D.T.ipad                                  1 0.0177348755
## D.terms.n.stem.stop.Ratio                 0 0.0175790908
## D.T.lock                                  1 0.0173034228
## D.T.activ                                 1 0.0166891768
## D.T.anoth                                 1 0.0164535903
## D.T.damag                                 1 0.0160662651
## D.T.ding                                  1 0.0155885916
## D.T.full                                  1 0.0149589509
## D.T.work                                  1 0.0145594907
## D.T.detail                                1 0.0139118798
## D.T.item                                  1 0.0134922662
## D.T.stylus                                1 0.0125154705
## D.T.packag                                1 0.0124597147
## storage.fctr                              0 0.0116754969
## D.T.edg                                   1 0.0114844118
## D.T.must                                  1 0.0113915486
## D.P.mini                                  1 0.0112418293
## D.T.photo                                 1 0.0110333858
## D.T.problem                               1 0.0107375772
## D.T.seal                                  1 0.0106898740
## D.T.come                                  1 0.0104488093
## D.T.corner                                1 0.0104287544
## D.T.brand                                 1 0.0103861855
## D.T.use                                   1 0.0103720246
## D.T.scuff                                 1 0.0101340501
## D.T.lightn                                1 0.0099034064
## D.T.speaker                               1 0.0096402551
## D.ratio.sum.TfIdf.nwrds                   0 0.0096247411
## D.P.air                                   1 0.0092629952
## D.T.includ                                1 0.0091767476
## D.T.side                                  1 0.0089049983
## D.T.mark                                  1 0.0088438689
## D.T.scratch                               1 0.0088060862
## D.T.icloud                                1 0.0086539687
## D.T.keyboard                              1 0.0082735718
## D.T.right                                 1 0.0080547459
## D.T.upper                                 1 0.0078374765
## D.T.manufactur                            1 0.0077942218
## D.T.mini                                  1 0.0075528886
## D.T.sinc                                  1 0.0072330260
## D.T.great                                 1 0.0070063865
## D.T.chip                                  1 0.0067464224
## D.T.geek                                  1 0.0064074827
## D.T.squad                                 1 0.0064074827
## D.T.protector                             1 0.0057850197
## D.T.case                                  1 0.0057562564
## D.T.wear                                  1 0.0048789708
## D.T.sticker                               1 0.0042625126
## D.T.retail                                1 0.0042217335
## D.T.see                                   1 0.0041433566
## D.npnct01.log                             0 0.0041255300
## D.P.spacegray                             1 0.0034818565
## D.T.air                                   1 0.0029579942
## D.T.clean                                 1 0.0025337892
## D.T.normal                                1 0.0019081337
## D.TfIdf.sum.stem.stop.Ratio               0 0.0014568383
## .rnorm                                    0 0.0014350110
## D.P.black                                 1 0.0012485463
## D.T.small                                 1 0.0010738542
## D.T.accessori                             1 0.0007963083
## D.T.X2016                                 1 0.0005289068
## D.T.good                                  1 0.0004368629
## D.T.show                                  1 0.0003156554
## D.T.expect                                1           NA
## D.T.intro                                 1           NA
## D.npnct02.log                             0           NA
## D.npnct04.log                             0           NA
## D.npnct17.log                             0           NA
## D.npnct19.log                             0           NA
## D.npnct20.log                             0           NA
## D.npnct21.log                             0           NA
## D.npnct22.log                             0           NA
## D.npnct23.log                             0           NA
## D.npnct25.log                             0           NA
## D.npnct26.log                             0           NA
## D.npnct27.log                             0           NA
## D.npnct29.log                             0           NA
## D.npnct30.log                             0           NA
## D.P.http                                  1           NA
# sav_feats_df <- glb_feats_df; glb_feats_df <- sav_feats_df
print(glb_feats_df <- orderBy(~-cor.y, 
          myfind_cor_features(feats_df=glb_feats_df, obs_df=glb_trnobs_df, 
                              rsp_var=glb_rsp_var)))
## [1] "cor(D.TfIdf.sum.post.stem, D.sum.TfIdf)=1.0000"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.sum.TfIdf)=-0.0324"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.sum.TfIdf as highly correlated with
## D.TfIdf.sum.post.stem
## [1] "cor(D.nwrds.unq.log, D.terms.n.post.stem.log)=1.0000"
## [1] "cor(sold.fctr, D.nwrds.unq.log)=-0.0638"
## [1] "cor(sold.fctr, D.terms.n.post.stem.log)=-0.0638"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stem.log as highly correlated
## with D.nwrds.unq.log
## [1] "cor(D.nwrds.unq.log, D.terms.n.post.stop.log)=0.9999"
## [1] "cor(sold.fctr, D.nwrds.unq.log)=-0.0638"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nwrds.unq.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.nchrs.log, D.nuppr.log)=0.9995"
## [1] "cor(sold.fctr, D.nchrs.log)=-0.0565"
## [1] "cor(sold.fctr, D.nuppr.log)=-0.0553"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nuppr.log as highly correlated with
## D.nchrs.log
## [1] "cor(D.terms.n.post.stem, D.terms.n.post.stop)=0.9991"
## [1] "cor(sold.fctr, D.terms.n.post.stem)=-0.0799"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stem as highly correlated with
## D.terms.n.post.stop
## [1] "cor(D.TfIdf.sum.post.stem, D.TfIdf.sum.post.stop)=0.9981"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stop)=-0.0303"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.TfIdf.sum.post.stop as highly correlated with
## D.TfIdf.sum.post.stem
## [1] "cor(D.nchrs.log, D.terms.n.post.stop.log)=0.9932"
## [1] "cor(sold.fctr, D.nchrs.log)=-0.0565"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nchrs.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.nwrds.log, D.terms.n.post.stop.log)=0.9932"
## [1] "cor(sold.fctr, D.nwrds.log)=-0.0588"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nwrds.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.terms.n.post.stop, D.terms.n.post.stop.log)=0.9755"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stop.log as highly correlated
## with D.terms.n.post.stop
## [1] "cor(D.npnct24.log, D.ratio.nstopwrds.nwrds)=-0.9654"
## [1] "cor(sold.fctr, D.npnct24.log)=-0.0458"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct24.log as highly correlated with
## D.ratio.nstopwrds.nwrds
## [1] "cor(D.npnct06.log, D.npnct16.log)=0.9556"
## [1] "cor(sold.fctr, D.npnct06.log)=-0.0500"
## [1] "cor(sold.fctr, D.npnct16.log)=-0.0449"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct16.log as highly correlated with
## D.npnct06.log
## [1] "cor(D.TfIdf.sum.post.stem, D.ratio.nstopwrds.nwrds)=-0.9291"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.TfIdf.sum.post.stem as highly correlated with
## D.ratio.nstopwrds.nwrds
## [1] "cor(D.nstopwrds.log, D.terms.n.post.stop)=0.8888"
## [1] "cor(sold.fctr, D.nstopwrds.log)=-0.0475"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nstopwrds.log as highly correlated with
## D.terms.n.post.stop
## [1] "cor(D.ratio.nstopwrds.nwrds, D.terms.n.post.stop)=-0.8707"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.ratio.nstopwrds.nwrds as highly correlated
## with D.terms.n.post.stop
## [1] "cor(D.npnct13.log, D.terms.n.post.stop)=0.7383"
## [1] "cor(sold.fctr, D.npnct13.log)=-0.0373"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct13.log as highly correlated with
## D.terms.n.post.stop
## [1] "cor(carrier.fctr, cellular.fctr)=0.7131"
## [1] "cor(sold.fctr, carrier.fctr)=-0.0599"
## [1] "cor(sold.fctr, cellular.fctr)=-0.0743"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified carrier.fctr as highly correlated with
## cellular.fctr
##                              id         cor.y exclude.as.feat    cor.y.abs
## 219                        sold  1.0000000000               1 1.0000000000
## 211                    biddable  0.5481788380               0 0.5481788380
## 17                    D.T.alway  0.0593444093               1 0.0593444093
## 48                 D.T.descript  0.0553987246               1 0.0553987246
## 202     D.ratio.nstopwrds.nwrds  0.0537832223               0 0.0537832223
## 86                     D.T.left  0.0525031466               1 0.0525031466
## 124                   D.T.refer  0.0503000028               1 0.0503000028
## 85                    D.T.least  0.0500485566               1 0.0500485566
## 71                  D.T.handset  0.0486468119               1 0.0486468119
## 182               D.npnct15.log  0.0484022793               0 0.0484022793
## 118                D.T.previous  0.0453194378               1 0.0453194378
## 155                     D.T.two  0.0447161329               1 0.0447161329
## 154                     D.T.top  0.0433671354               1 0.0433671354
## 140                    D.T.sign  0.0412800974               1 0.0412800974
## 67                D.T.function.  0.0397438087               1 0.0397438087
## 104                     D.T.non  0.0397064496               1 0.0397064496
## 28                   D.T.broken  0.0380340254               1 0.0380340254
## 41                   D.T.corpor  0.0353706112               1 0.0353706112
## 101                    D.T.name  0.0352663148               1 0.0352663148
## 131                  D.T.scroll  0.0339028710               1 0.0339028710
## 44                    D.T.crack  0.0337091183               1 0.0337091183
## 76                     D.T.imag  0.0335537013               1 0.0335537013
## 33                  D.T.charger  0.0301602937               1 0.0301602937
## 136                   D.T.shape  0.0288373334               1 0.0288373334
## 21                     D.T.back  0.0272103292               1 0.0272103292
## 116                  D.T.pictur  0.0271676520               1 0.0271676520
## 59                    D.T.excel  0.0265819659               1 0.0265819659
## 47                     D.T.dent  0.0264976918               1 0.0264976918
## 51                    D.T.digit  0.0261375762               1 0.0261375762
## 170               D.npnct03.log  0.0257637868               0 0.0257637868
## 53                 D.T.disclaim  0.0250040676               1 0.0250040676
## 56                      D.T.els  0.0250040676               1 0.0250040676
## 58                  D.T.essenti  0.0250040676               1 0.0250040676
## 126                 D.T.repeat.  0.0250040676               1 0.0250040676
## 148                   D.T.super  0.0250040676               1 0.0250040676
## 174               D.npnct07.log  0.0250040676               0 0.0250040676
## 66                    D.T.fulli  0.0243976808               1 0.0243976808
## 130                  D.T.screen  0.0232373651               1 0.0232373651
## 94                      D.T.may  0.0225762388               1 0.0225762388
## 38                  D.T.contact  0.0218134520               1 0.0218134520
## 83                     D.T.kept  0.0207141990               1 0.0207141990
## 151                    D.T.tear  0.0205481770               1 0.0205481770
## 10                    D.P.white  0.0184898845               1 0.0184898845
## 117                   D.T.pleas  0.0178751143               1 0.0178751143
## 209   D.terms.n.stem.stop.Ratio  0.0175790908               0 0.0175790908
## 91                     D.T.lock  0.0173034228               1 0.0173034228
## 18                    D.T.anoth  0.0164535903               1 0.0164535903
## 52                     D.T.ding  0.0155885916               1 0.0155885916
## 65                     D.T.full  0.0149589509               1 0.0149589509
## 110                  D.T.packag  0.0124597147               1 0.0124597147
## 55                      D.T.edg  0.0114844118               1 0.0114844118
## 100                    D.T.must  0.0113915486               1 0.0113915486
## 114                   D.T.photo  0.0110333858               1 0.0110333858
## 119                 D.T.problem  0.0107375772               1 0.0107375772
## 133                    D.T.seal  0.0106898740               1 0.0106898740
## 158                     D.T.use  0.0103720246               1 0.0103720246
## 132                   D.T.scuff  0.0101340501               1 0.0101340501
## 143                 D.T.speaker  0.0096402551               1 0.0096402551
## 203     D.ratio.sum.TfIdf.nwrds  0.0096247411               0 0.0096247411
## 139                    D.T.side  0.0089049983               1 0.0089049983
## 75                   D.T.icloud  0.0086539687               1 0.0086539687
## 84                 D.T.keyboard  0.0082735718               1 0.0082735718
## 157                   D.T.upper  0.0078374765               1 0.0078374765
## 92               D.T.manufactur  0.0077942218               1 0.0077942218
## 141                    D.T.sinc  0.0072330260               1 0.0072330260
## 70                    D.T.great  0.0070063865               1 0.0070063865
## 121               D.T.protector  0.0057850197               1 0.0057850197
## 31                     D.T.case  0.0057562564               1 0.0057562564
## 145                 D.T.sticker  0.0042625126               1 0.0042625126
## 134                     D.T.see  0.0041433566               1 0.0041433566
## 168               D.npnct01.log  0.0041255300               0 0.0041255300
## 9                 D.P.spacegray  0.0034818565               1 0.0034818565
## 35                    D.T.clean  0.0025337892               1 0.0025337892
## 105                  D.T.normal  0.0019081337               1 0.0019081337
## 13                D.T.accessori  0.0007963083               1 0.0007963083
## 138                    D.T.show -0.0003156554               1 0.0003156554
## 69                     D.T.good -0.0004368629               1 0.0004368629
## 12                    D.T.X2016 -0.0005289068               1 0.0005289068
## 142                   D.T.small -0.0010738542               1 0.0010738542
## 5                     D.P.black -0.0012485463               1 0.0012485463
## 3                        .rnorm -0.0014350110               0 0.0014350110
## 165 D.TfIdf.sum.stem.stop.Ratio -0.0014568383               0 0.0014568383
## 16                      D.T.air -0.0029579942               1 0.0029579942
## 127                  D.T.retail -0.0042217335               1 0.0042217335
## 159                    D.T.wear -0.0048789708               1 0.0048789708
## 68                     D.T.geek -0.0064074827               1 0.0064074827
## 144                   D.T.squad -0.0064074827               1 0.0064074827
## 34                     D.T.chip -0.0067464224               1 0.0067464224
## 95                     D.T.mini -0.0075528886               1 0.0075528886
## 128                   D.T.right -0.0080547459               1 0.0080547459
## 129                 D.T.scratch -0.0088060862               1 0.0088060862
## 93                     D.T.mark -0.0088438689               1 0.0088438689
## 78                   D.T.includ -0.0091767476               1 0.0091767476
## 4                       D.P.air -0.0092629952               1 0.0092629952
## 88                   D.T.lightn -0.0099034064               1 0.0099034064
## 27                    D.T.brand -0.0103861855               1 0.0103861855
## 40                   D.T.corner -0.0104287544               1 0.0104287544
## 36                     D.T.come -0.0104488093               1 0.0104488093
## 8                      D.P.mini -0.0112418293               1 0.0112418293
## 224                storage.fctr -0.0116754969               0 0.0116754969
## 147                  D.T.stylus -0.0125154705               1 0.0125154705
## 82                     D.T.item -0.0134922662               1 0.0134922662
## 49                   D.T.detail -0.0139118798               1 0.0139118798
## 162                    D.T.work -0.0145594907               1 0.0145594907
## 45                    D.T.damag -0.0160662651               1 0.0160662651
## 14                    D.T.activ -0.0166891768               1 0.0166891768
## 81                     D.T.ipad -0.0177348755               1 0.0177348755
## 107                   D.T.order -0.0188854872               1 0.0188854872
## 178               D.npnct11.log -0.0192035548               0 0.0192035548
## 106                    D.T.open -0.0193952625               1 0.0193952625
## 23                    D.T.beetl -0.0215250231               1 0.0215250231
## 46                   D.T.defens -0.0215250231               1 0.0215250231
## 63                    D.T.final -0.0215250231               1 0.0215250231
## 73                   D.T.higher -0.0215250231               1 0.0215250231
## 99                    D.T.money -0.0215250231               1 0.0215250231
## 185               D.npnct18.log -0.0215250231               0 0.0215250231
## 123                    D.T.read -0.0226911615               1 0.0226911615
## 156                    D.T.unit -0.0231226779               1 0.0231226779
## 177               D.npnct10.log -0.0241015016               0 0.0241015016
## 87                    D.T.light -0.0249083615               1 0.0249083615
## 122                 D.T.qualiti -0.0252899986               1 0.0252899986
## 25                     D.T.bodi -0.0252978602               1 0.0252978602
## 19                     D.T.appl -0.0287074081               1 0.0287074081
## 24                  D.T.blemish -0.0291642840               1 0.0291642840
## 115                     D.T.pic -0.0292106355               1 0.0292106355
## 30                     D.T.cabl -0.0296202577               1 0.0296202577
## 111                 D.T.passcod -0.0303239925               1 0.0303239925
## 164       D.TfIdf.sum.post.stop -0.0303366192               0 0.0303366192
## 6                      D.P.gold -0.0304491748               1 0.0304491748
## 54                  D.T.display -0.0307725689               1 0.0307725689
## 77                     D.T.imei -0.0308865534               1 0.0308865534
## 1                    .clusterid -0.0318360231               1 0.0318360231
## 2               .clusterid.fctr -0.0318360231               0 0.0318360231
## 163       D.TfIdf.sum.post.stem -0.0323742743               0 0.0323742743
## 204                 D.sum.TfIdf -0.0323742743               0 0.0323742743
## 96                    D.T.minim -0.0330865270               1 0.0330865270
## 29                   D.T.button -0.0338831937               1 0.0338831937
## 161                    D.T.will -0.0350451461               1 0.0350451461
## 39                  D.T.contain -0.0355839439               1 0.0355839439
## 217          prdl.my.descr.fctr -0.0358461598               0 0.0358461598
## 103                     D.T.new -0.0372353149               1 0.0372353149
## 42                  D.T.correct -0.0373025158               1 0.0373025158
## 62                   D.T.featur -0.0373025158               1 0.0373025158
## 180               D.npnct13.log -0.0373463069               0 0.0373463069
## 32                  D.T.certifi -0.0385931627               1 0.0385931627
## 214                  color.fctr -0.0391372902               0 0.0391372902
## 72                  D.T.heavili -0.0391978700               1 0.0391978700
## 175               D.npnct08.log -0.0396513123               0 0.0396513123
## 218             prdline.my.fctr -0.0415814340               1 0.0415814340
## 37                   D.T.condit -0.0418798096               1 0.0418798096
## 149                    D.T.sync -0.0430848435               1 0.0430848435
## 152              D.T.technician -0.0430848435               1 0.0430848435
## 125               D.T.refurbish -0.0449149382               1 0.0449149382
## 183               D.npnct16.log -0.0449403962               0 0.0449403962
## 191               D.npnct24.log -0.0458449965               0 0.0458449965
## 198             D.nstopwrds.log -0.0474681704               0 0.0474681704
## 64                     D.T.free -0.0478266395               1 0.0478266395
## 137                    D.T.ship -0.0483492299               1 0.0483492299
## 97                    D.T.minor -0.0483597041               1 0.0483597041
## 160                    D.T.wifi -0.0499453504               1 0.0499453504
## 173               D.npnct06.log -0.0499761958               0 0.0499761958
## 50                    D.T.devic -0.0504727874               1 0.0504727874
## 112                 D.T.perfect -0.0504871511               1 0.0504871511
## 22                     D.T.bare -0.0509186819               1 0.0509186819
## 57                      D.T.esn -0.0517020813               1 0.0517020813
## 195               D.npnct28.log -0.0524583244               0 0.0524583244
## 108                  D.T.origin -0.0525252573               1 0.0525252573
## 113                   D.T.phone -0.0527118662               1 0.0527118662
## 199                 D.nuppr.log -0.0553358386               0 0.0553358386
## 20                   D.T.averag -0.0555976359               1 0.0555976359
## 89                     D.T.like -0.0557801451               1 0.0557801451
## 26                      D.T.box -0.0563485768               1 0.0563485768
## 166                 D.nchrs.log -0.0565357348               0 0.0565357348
## 15                   D.T.affect -0.0566799690               1 0.0566799690
## 79                  D.T.inspect -0.0568460093               1 0.0568460093
## 135                  D.T.seller -0.0584897212               1 0.0584897212
## 153                    D.T.test -0.0586208334               1 0.0586208334
## 200                 D.nwrds.log -0.0588147403               0 0.0588147403
## 179               D.npnct12.log -0.0593256462               0 0.0593256462
## 212                carrier.fctr -0.0599089237               0 0.0599089237
## 146                   D.T.stock -0.0607284075               1 0.0607284075
## 98                     D.T.mint -0.0610303678               1 0.0610303678
## 176               D.npnct09.log -0.0618253281               0 0.0618253281
## 109                  D.T.overal -0.0621057222               1 0.0621057222
## 167                 D.ndgts.log -0.0628684727               0 0.0628684727
## 201             D.nwrds.unq.log -0.0638431167               0 0.0638431167
## 206     D.terms.n.post.stem.log -0.0638431167               0 0.0638431167
## 208     D.terms.n.post.stop.log -0.0638651730               0 0.0638651730
## 150                     D.T.tab -0.0707242028               1 0.0707242028
## 120              D.T.profession -0.0712586605               1 0.0712586605
## 213               cellular.fctr -0.0743297381               0 0.0743297381
## 43                   D.T.cosmet -0.0777513602               1 0.0777513602
## 181               D.npnct14.log -0.0786203827               0 0.0786203827
## 205         D.terms.n.post.stem -0.0798677390               0 0.0798677390
## 207         D.terms.n.post.stop -0.0800729927               0 0.0800729927
## 61                     D.T.fair -0.0802848689               1 0.0802848689
## 90                     D.T.list -0.0870905528               1 0.0870905528
## 102                    D.T.near -0.0929819941               1 0.0929819941
## 11                     D.T.X100 -0.1150127028               1 0.1150127028
## 172               D.npnct05.log -0.1180558939               0 0.1180558939
## 74                     D.T.hous -0.1373919817               1 0.1373919817
## 215              condition.fctr -0.1535490071               0 0.1535490071
## 210                    UniqueID -0.1895466260               1 0.1895466260
## 216                    idseq.my -0.1895466260               0 0.1895466260
## 221             startprice.diff -0.2794223471               0 0.2794223471
## 223         startprice.predict. -0.3573141534               1 0.3573141534
## 220                  startprice -0.4569767211               1 0.4569767211
## 222              startprice.log -0.4674275376               1 0.4674275376
## 7                      D.P.http            NA               1           NA
## 60                   D.T.expect            NA               1           NA
## 80                    D.T.intro            NA               1           NA
## 169               D.npnct02.log            NA               0           NA
## 171               D.npnct04.log            NA               0           NA
## 184               D.npnct17.log            NA               0           NA
## 186               D.npnct19.log            NA               0           NA
## 187               D.npnct20.log            NA               0           NA
## 188               D.npnct21.log            NA               0           NA
## 189               D.npnct22.log            NA               0           NA
## 190               D.npnct23.log            NA               0           NA
## 192               D.npnct25.log            NA               0           NA
## 193               D.npnct26.log            NA               0           NA
## 194               D.npnct27.log            NA               0           NA
## 196               D.npnct29.log            NA               0           NA
## 197               D.npnct30.log            NA               0           NA
##                  cor.high.X   freqRatio percentUnique zeroVar   nzv
## 219                    <NA>    1.161628    0.10758472   FALSE FALSE
## 211                    <NA>    1.221027    0.10758472   FALSE FALSE
## 17                     <NA>  461.000000    0.43033889   FALSE  TRUE
## 48                     <NA>  203.444444    0.48413125   FALSE  TRUE
## 202     D.terms.n.post.stop   13.544304    4.19580420   FALSE FALSE
## 86                     <NA>  925.500000    0.37654653   FALSE  TRUE
## 124                    <NA>  264.142857    0.21516945   FALSE  TRUE
## 85                     <NA>  463.750000    0.10758472   FALSE  TRUE
## 71                     <NA>  264.571429    0.10758472   FALSE  TRUE
## 182                    <NA>  153.416667    0.16137708   FALSE  TRUE
## 118                    <NA>  263.857143    0.26896181   FALSE  TRUE
## 155                    <NA>  617.666667    0.21516945   FALSE  TRUE
## 154                    <NA>  368.200000    0.53792361   FALSE  TRUE
## 140                    <NA>  112.500000    0.59171598   FALSE  TRUE
## 67                     <NA>   71.080000    0.59171598   FALSE  TRUE
## 104                    <NA>  308.500000    0.21516945   FALSE  TRUE
## 28                     <NA>  263.285714    0.48413125   FALSE  TRUE
## 41                     <NA>  928.500000    0.10758472   FALSE  TRUE
## 101                    <NA> 1857.000000    0.16137708   FALSE  TRUE
## 131                    <NA>  618.333333    0.16137708   FALSE  TRUE
## 44                     <NA>  258.285714    0.80688542   FALSE  TRUE
## 76                     <NA> 1857.000000    0.16137708   FALSE  TRUE
## 33                     <NA>  305.166667    0.69930070   FALSE  TRUE
## 136                    <NA>  615.000000    0.48413125   FALSE  TRUE
## 21                     <NA>  138.846154    0.64550834   FALSE  TRUE
## 116                    <NA>  367.000000    0.48413125   FALSE  TRUE
## 59                     <NA>  149.666667    0.75309306   FALSE  TRUE
## 47                     <NA>  165.272727    0.53792361   FALSE  TRUE
## 51                     <NA>  368.800000    0.48413125   FALSE  TRUE
## 170                    <NA>   83.227273    0.16137708   FALSE  TRUE
## 53                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 56                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 58                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 126                    <NA> 1858.000000    0.10758472   FALSE  TRUE
## 148                    <NA> 1858.000000    0.10758472   FALSE  TRUE
## 174                    <NA> 1858.000000    0.10758472   FALSE  TRUE
## 66                     <NA>   99.777778    0.64550834   FALSE  TRUE
## 130                    <NA>   58.862069    0.80688542   FALSE  TRUE
## 94                     <NA>  262.571429    0.26896181   FALSE  TRUE
## 38                     <NA>  230.875000    0.26896181   FALSE  TRUE
## 83                     <NA>  307.000000    0.53792361   FALSE  TRUE
## 151                    <NA>  262.571429    0.43033889   FALSE  TRUE
## 10                     <NA>  231.250000    0.16137708   FALSE  TRUE
## 117                    <NA>  181.100000    0.59171598   FALSE  TRUE
## 209                    <NA>   71.600000    0.43033889   FALSE  TRUE
## 91                     <NA>  366.200000    0.69930070   FALSE  TRUE
## 18                     <NA>  618.666667    0.10758472   FALSE  TRUE
## 52                     <NA>  307.166667    0.43033889   FALSE  TRUE
## 65                     <NA>  616.000000    0.37654653   FALSE  TRUE
## 110                    <NA>  306.666667    0.37654653   FALSE  TRUE
## 55                     <NA>  617.666667    0.21516945   FALSE  TRUE
## 100                    <NA>  618.000000    0.21516945   FALSE  TRUE
## 114                    <NA>  614.000000    0.59171598   FALSE  TRUE
## 119                    <NA>  463.250000    0.21516945   FALSE  TRUE
## 133                    <NA>  927.000000    0.26896181   FALSE  TRUE
## 158                    <NA>   51.656250    0.96826251   FALSE  TRUE
## 132                    <NA>  228.250000    0.53792361   FALSE  TRUE
## 143                    <NA>  928.000000    0.16137708   FALSE  TRUE
## 203                    <NA>   63.000000   34.85745024   FALSE FALSE
## 139                    <NA>  927.500000    0.21516945   FALSE  TRUE
## 75                     <NA>  182.400000    0.59171598   FALSE  TRUE
## 84                     <NA> 1854.000000    0.32275417   FALSE  TRUE
## 157                    <NA> 1857.000000    0.16137708   FALSE  TRUE
## 92                     <NA>  463.000000    0.26896181   FALSE  TRUE
## 141                    <NA>  615.666667    0.37654653   FALSE  TRUE
## 70                     <NA>   98.666667    0.80688542   FALSE  TRUE
## 121                    <NA>  461.000000    0.37654653   FALSE  TRUE
## 31                     <NA>  105.352941    0.69930070   FALSE  TRUE
## 145                    <NA>  463.250000    0.16137708   FALSE  TRUE
## 134                    <NA>  260.571429    0.69930070   FALSE  TRUE
## 168                    <NA>   52.970588    0.32275417   FALSE  TRUE
## 9                      <NA>  463.750000    0.10758472   FALSE  TRUE
## 35                     <NA>  203.222222    0.64550834   FALSE  TRUE
## 105                    <NA>  305.500000    0.48413125   FALSE  TRUE
## 13                     <NA>  229.875000    0.43033889   FALSE  TRUE
## 138                    <NA>  113.937500    0.32275417   FALSE  TRUE
## 69                     <NA>   49.200000    0.86067778   FALSE  TRUE
## 12                     <NA> 1857.000000    0.16137708   FALSE  TRUE
## 142                    <NA>  261.428571    0.48413125   FALSE  TRUE
## 5                      <NA>  168.000000    0.10758472   FALSE  TRUE
## 3                      <NA>    1.000000  100.00000000   FALSE FALSE
## 165                    <NA>   65.176471   32.86713287   FALSE FALSE
## 16                     <NA>  460.750000    0.43033889   FALSE  TRUE
## 127                    <NA>  463.000000    0.26896181   FALSE  TRUE
## 159                    <NA>   99.500000    0.53792361   FALSE  TRUE
## 68                     <NA> 1856.000000    0.21516945   FALSE  TRUE
## 144                    <NA> 1856.000000    0.21516945   FALSE  TRUE
## 34                     <NA> 1855.000000    0.26896181   FALSE  TRUE
## 95                     <NA>  459.500000    0.59171598   FALSE  TRUE
## 128                    <NA>  616.000000    0.48413125   FALSE  TRUE
## 129                    <NA>   43.578947    0.86067778   FALSE  TRUE
## 93                     <NA>  167.181818    0.37654653   FALSE  TRUE
## 78                     <NA>  105.882353    0.69930070   FALSE  TRUE
## 4                      <NA>  122.866667    0.16137708   FALSE  TRUE
## 88                     <NA> 1856.000000    0.21516945   FALSE  TRUE
## 27                     <NA>  459.000000    0.69930070   FALSE  TRUE
## 40                     <NA>  305.500000    0.43033889   FALSE  TRUE
## 36                     <NA>  139.846154    0.64550834   FALSE  TRUE
## 8                      <NA>   91.900000    0.16137708   FALSE  TRUE
## 224                    <NA>    2.725146    0.26896181   FALSE FALSE
## 147                    <NA> 1857.000000    0.16137708   FALSE  TRUE
## 82                     <NA>   88.850000    0.64550834   FALSE  TRUE
## 49                     <NA> 1856.000000    0.21516945   FALSE  TRUE
## 162                    <NA>   71.583333    0.69930070   FALSE  TRUE
## 45                     <NA>  460.000000    0.48413125   FALSE  TRUE
## 14                     <NA>  927.500000    0.21516945   FALSE  TRUE
## 81                     <NA>   49.823529    0.96826251   FALSE  TRUE
## 107                    <NA>  461.250000    0.37654653   FALSE  TRUE
## 178                    <NA>    9.374269    0.37654653   FALSE FALSE
## 106                    <NA>  261.142857    0.53792361   FALSE  TRUE
## 23                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 46                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 63                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 73                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 99                     <NA> 1858.000000    0.10758472   FALSE  TRUE
## 185                    <NA> 1858.000000    0.10758472   FALSE  TRUE
## 123                    <NA>  459.500000    0.53792361   FALSE  TRUE
## 156                    <NA>  203.555556    0.48413125   FALSE  TRUE
## 177                    <NA>  308.666667    0.16137708   FALSE  TRUE
## 87                     <NA>   94.526316    0.64550834   FALSE  TRUE
## 122                    <NA>  463.500000    0.16137708   FALSE  TRUE
## 25                     <NA>  925.500000    0.32275417   FALSE  TRUE
## 19                     <NA>  226.750000    0.64550834   FALSE  TRUE
## 24                     <NA>  107.941176    0.37654653   FALSE  TRUE
## 115                    <NA> 1857.000000    0.16137708   FALSE  TRUE
## 30                     <NA>  230.625000    0.32275417   FALSE  TRUE
## 111                    <NA> 1857.000000    0.16137708   FALSE  TRUE
## 164   D.TfIdf.sum.post.stem   63.000000   34.26573427   FALSE FALSE
## 6                      <NA>  928.500000    0.10758472   FALSE  TRUE
## 54                     <NA>  184.300000    0.43033889   FALSE  TRUE
## 77                     <NA>  369.400000    0.32275417   FALSE  TRUE
## 1                      <NA>    6.886598    0.37654653   FALSE FALSE
## 2                      <NA>    6.886598    0.37654653   FALSE FALSE
## 163 D.ratio.nstopwrds.nwrds   63.000000   34.26573427   FALSE FALSE
## 204   D.TfIdf.sum.post.stem   63.000000   34.26573427   FALSE FALSE
## 96                     <NA>  166.909091    0.43033889   FALSE  TRUE
## 29                     <NA>  369.400000    0.32275417   FALSE  TRUE
## 161                    <NA>  367.000000    0.53792361   FALSE  TRUE
## 39                     <NA>  928.000000    0.16137708   FALSE  TRUE
## 217                    <NA>    1.271676    0.75309306   FALSE FALSE
## 103                    <NA>  109.437500    0.86067778   FALSE  TRUE
## 42                     <NA>  618.666667    0.10758472   FALSE  TRUE
## 62                     <NA>  618.666667    0.10758472   FALSE  TRUE
## 180     D.terms.n.post.stop    5.203065    0.32275417   FALSE FALSE
## 32                     <NA>  370.400000    0.21516945   FALSE  TRUE
## 214                    <NA>    1.544053    0.26896181   FALSE FALSE
## 72                     <NA>  308.666667    0.16137708   FALSE  TRUE
## 175                    <NA>   69.576923    0.21516945   FALSE  TRUE
## 218                    <NA>    1.135048    0.37654653   FALSE FALSE
## 37                     <NA>   25.233333    0.96826251   FALSE  TRUE
## 149                    <NA>  463.750000    0.10758472   FALSE  TRUE
## 152                    <NA>  463.750000    0.10758472   FALSE  TRUE
## 125                    <NA>  183.400000    0.43033889   FALSE  TRUE
## 183           D.npnct06.log   31.245614    0.16137708   FALSE  TRUE
## 191 D.ratio.nstopwrds.nwrds    1.356147    0.10758472   FALSE FALSE
## 198     D.terms.n.post.stop   13.000000    0.80688542   FALSE FALSE
## 64                     <NA>  263.142857    0.37654653   FALSE  TRUE
## 137                    <NA>  308.333333    0.21516945   FALSE  TRUE
## 97                     <NA>   84.142857    0.59171598   FALSE  TRUE
## 160                    <NA>  368.400000    0.43033889   FALSE  TRUE
## 173                    <NA>   33.735849    0.16137708   FALSE  TRUE
## 50                     <NA>   94.789474    0.64550834   FALSE  TRUE
## 112                    <NA>  226.625000    0.64550834   FALSE  TRUE
## 22                     <NA>  615.333333    0.43033889   FALSE  TRUE
## 57                     <NA>  461.500000    0.37654653   FALSE  TRUE
## 195                    <NA>  463.250000    0.16137708   FALSE  TRUE
## 108                    <NA>  227.875000    0.69930070   FALSE  TRUE
## 113                    <NA>  463.250000    0.16137708   FALSE  TRUE
## 199             D.nchrs.log   18.807018    4.41097364   FALSE FALSE
## 20                     <NA>  617.333333    0.26896181   FALSE  TRUE
## 89                     <NA>  258.571429    0.80688542   FALSE  TRUE
## 26                     <NA>   89.300000    0.80688542   FALSE  TRUE
## 166 D.terms.n.post.stop.log   15.970149    5.70199032   FALSE FALSE
## 15                     <NA>  131.428571    0.21516945   FALSE  TRUE
## 79                     <NA>  617.333333    0.21516945   FALSE  TRUE
## 135                    <NA>  369.600000    0.21516945   FALSE  TRUE
## 153                    <NA>  261.571429    0.43033889   FALSE  TRUE
## 200 D.terms.n.post.stop.log   12.738095    1.29101668   FALSE FALSE
## 179                    <NA>   27.246154    0.21516945   FALSE  TRUE
## 212           cellular.fctr    3.220290    0.37654653   FALSE FALSE
## 146                    <NA>  462.750000    0.21516945   FALSE  TRUE
## 98                     <NA>   78.782609    0.75309306   FALSE  TRUE
## 176                    <NA>  308.333333    0.21516945   FALSE  TRUE
## 109                    <NA>  263.857143    0.37654653   FALSE  TRUE
## 167                    <NA>   27.047619    0.69930070   FALSE  TRUE
## 201 D.terms.n.post.stop.log    8.568000    0.80688542   FALSE FALSE
## 206         D.nwrds.unq.log    8.568000    0.80688542   FALSE FALSE
## 208     D.terms.n.post.stop    9.232759    0.80688542   FALSE FALSE
## 150                    <NA>  204.666667    0.26896181   FALSE  TRUE
## 120                    <NA>  308.000000    0.21516945   FALSE  TRUE
## 213                    <NA>    2.116190    0.16137708   FALSE FALSE
## 43                     <NA>   49.361111    0.48413125   FALSE  TRUE
## 181                    <NA>   35.333333    0.26896181   FALSE  TRUE
## 205     D.terms.n.post.stop    8.568000    0.80688542   FALSE FALSE
## 207                    <NA>    9.232759    0.80688542   FALSE FALSE
## 61                     <NA>  369.000000    0.26896181   FALSE  TRUE
## 90                     <NA>  107.588235    0.26896181   FALSE  TRUE
## 102                    <NA>   91.600000    0.37654653   FALSE  TRUE
## 11                     <NA>   86.380952    0.48413125   FALSE  TRUE
## 172                    <NA>   40.311111    0.10758472   FALSE  TRUE
## 74                     <NA>  100.333333    0.43033889   FALSE  TRUE
## 215                    <NA>    4.003460    0.32275417   FALSE FALSE
## 210                    <NA>    1.000000  100.00000000   FALSE FALSE
## 216                    <NA>    1.000000  100.00000000   FALSE FALSE
## 221                    <NA>    1.000000  100.00000000   FALSE FALSE
## 223                    <NA>    1.000000  100.00000000   FALSE FALSE
## 220                    <NA>    2.807692   30.17751479   FALSE FALSE
## 222                    <NA>    2.807692   30.17751479   FALSE FALSE
## 7                      <NA>    0.000000    0.05379236    TRUE  TRUE
## 60                     <NA>    0.000000    0.05379236    TRUE  TRUE
## 80                     <NA>    0.000000    0.05379236    TRUE  TRUE
## 169                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 171                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 184                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 186                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 187                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 188                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 189                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 190                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 192                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 193                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 194                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 196                    <NA>    0.000000    0.05379236    TRUE  TRUE
## 197                    <NA>    0.000000    0.05379236    TRUE  TRUE
##     myNearZV is.cor.y.abs.low
## 219    FALSE            FALSE
## 211    FALSE            FALSE
## 17     FALSE            FALSE
## 48     FALSE            FALSE
## 202    FALSE            FALSE
## 86      TRUE            FALSE
## 124    FALSE            FALSE
## 85     FALSE            FALSE
## 71     FALSE            FALSE
## 182    FALSE            FALSE
## 118    FALSE            FALSE
## 155     TRUE            FALSE
## 154    FALSE            FALSE
## 140    FALSE            FALSE
## 67     FALSE            FALSE
## 104    FALSE            FALSE
## 28     FALSE            FALSE
## 41      TRUE            FALSE
## 101     TRUE            FALSE
## 131     TRUE            FALSE
## 44     FALSE            FALSE
## 76      TRUE            FALSE
## 33     FALSE            FALSE
## 136     TRUE            FALSE
## 21     FALSE            FALSE
## 116    FALSE            FALSE
## 59     FALSE            FALSE
## 47     FALSE            FALSE
## 51     FALSE            FALSE
## 170    FALSE            FALSE
## 53      TRUE            FALSE
## 56      TRUE            FALSE
## 58      TRUE            FALSE
## 126     TRUE            FALSE
## 148     TRUE            FALSE
## 174     TRUE            FALSE
## 66     FALSE            FALSE
## 130    FALSE            FALSE
## 94     FALSE            FALSE
## 38     FALSE            FALSE
## 83     FALSE            FALSE
## 151    FALSE            FALSE
## 10     FALSE            FALSE
## 117    FALSE            FALSE
## 209    FALSE            FALSE
## 91     FALSE            FALSE
## 18      TRUE            FALSE
## 52     FALSE            FALSE
## 65      TRUE            FALSE
## 110    FALSE            FALSE
## 55      TRUE            FALSE
## 100     TRUE            FALSE
## 114     TRUE            FALSE
## 119    FALSE            FALSE
## 133     TRUE            FALSE
## 158    FALSE            FALSE
## 132    FALSE            FALSE
## 143     TRUE            FALSE
## 203    FALSE            FALSE
## 139     TRUE            FALSE
## 75     FALSE            FALSE
## 84      TRUE            FALSE
## 157     TRUE            FALSE
## 92     FALSE            FALSE
## 141     TRUE            FALSE
## 70     FALSE            FALSE
## 121    FALSE            FALSE
## 31     FALSE            FALSE
## 145    FALSE            FALSE
## 134    FALSE            FALSE
## 168    FALSE            FALSE
## 9      FALSE            FALSE
## 35     FALSE            FALSE
## 105    FALSE            FALSE
## 13     FALSE             TRUE
## 138    FALSE             TRUE
## 69     FALSE             TRUE
## 12      TRUE             TRUE
## 142    FALSE             TRUE
## 5      FALSE             TRUE
## 3      FALSE            FALSE
## 165    FALSE            FALSE
## 16     FALSE            FALSE
## 127    FALSE            FALSE
## 159    FALSE            FALSE
## 68      TRUE            FALSE
## 144     TRUE            FALSE
## 34      TRUE            FALSE
## 95     FALSE            FALSE
## 128     TRUE            FALSE
## 129    FALSE            FALSE
## 93     FALSE            FALSE
## 78     FALSE            FALSE
## 4      FALSE            FALSE
## 88      TRUE            FALSE
## 27     FALSE            FALSE
## 40     FALSE            FALSE
## 36     FALSE            FALSE
## 8      FALSE            FALSE
## 224    FALSE            FALSE
## 147     TRUE            FALSE
## 82     FALSE            FALSE
## 49      TRUE            FALSE
## 162    FALSE            FALSE
## 45     FALSE            FALSE
## 14      TRUE            FALSE
## 81     FALSE            FALSE
## 107    FALSE            FALSE
## 178    FALSE            FALSE
## 106    FALSE            FALSE
## 23      TRUE            FALSE
## 46      TRUE            FALSE
## 63      TRUE            FALSE
## 73      TRUE            FALSE
## 99      TRUE            FALSE
## 185     TRUE            FALSE
## 123    FALSE            FALSE
## 156    FALSE            FALSE
## 177    FALSE            FALSE
## 87     FALSE            FALSE
## 122    FALSE            FALSE
## 25      TRUE            FALSE
## 19     FALSE            FALSE
## 24     FALSE            FALSE
## 115     TRUE            FALSE
## 30     FALSE            FALSE
## 111     TRUE            FALSE
## 164    FALSE            FALSE
## 6       TRUE            FALSE
## 54     FALSE            FALSE
## 77     FALSE            FALSE
## 1      FALSE            FALSE
## 2      FALSE            FALSE
## 163    FALSE            FALSE
## 204    FALSE            FALSE
## 96     FALSE            FALSE
## 29     FALSE            FALSE
## 161    FALSE            FALSE
## 39      TRUE            FALSE
## 217    FALSE            FALSE
## 103    FALSE            FALSE
## 42      TRUE            FALSE
## 62      TRUE            FALSE
## 180    FALSE            FALSE
## 32     FALSE            FALSE
## 214    FALSE            FALSE
## 72     FALSE            FALSE
## 175    FALSE            FALSE
## 218    FALSE            FALSE
## 37     FALSE            FALSE
## 149    FALSE            FALSE
## 152    FALSE            FALSE
## 125    FALSE            FALSE
## 183    FALSE            FALSE
## 191    FALSE            FALSE
## 198    FALSE            FALSE
## 64     FALSE            FALSE
## 137    FALSE            FALSE
## 97     FALSE            FALSE
## 160    FALSE            FALSE
## 173    FALSE            FALSE
## 50     FALSE            FALSE
## 112    FALSE            FALSE
## 22      TRUE            FALSE
## 57     FALSE            FALSE
## 195    FALSE            FALSE
## 108    FALSE            FALSE
## 113    FALSE            FALSE
## 199    FALSE            FALSE
## 20      TRUE            FALSE
## 89     FALSE            FALSE
## 26     FALSE            FALSE
## 166    FALSE            FALSE
## 15     FALSE            FALSE
## 79      TRUE            FALSE
## 135    FALSE            FALSE
## 153    FALSE            FALSE
## 200    FALSE            FALSE
## 179    FALSE            FALSE
## 212    FALSE            FALSE
## 146    FALSE            FALSE
## 98     FALSE            FALSE
## 176    FALSE            FALSE
## 109    FALSE            FALSE
## 167    FALSE            FALSE
## 201    FALSE            FALSE
## 206    FALSE            FALSE
## 208    FALSE            FALSE
## 150    FALSE            FALSE
## 120    FALSE            FALSE
## 213    FALSE            FALSE
## 43     FALSE            FALSE
## 181    FALSE            FALSE
## 205    FALSE            FALSE
## 207    FALSE            FALSE
## 61     FALSE            FALSE
## 90     FALSE            FALSE
## 102    FALSE            FALSE
## 11     FALSE            FALSE
## 172    FALSE            FALSE
## 74     FALSE            FALSE
## 215    FALSE            FALSE
## 210    FALSE            FALSE
## 216    FALSE            FALSE
## 221    FALSE            FALSE
## 223    FALSE            FALSE
## 220    FALSE            FALSE
## 222    FALSE            FALSE
## 7       TRUE               NA
## 60      TRUE               NA
## 80      TRUE               NA
## 169     TRUE               NA
## 171     TRUE               NA
## 184     TRUE               NA
## 186     TRUE               NA
## 187     TRUE               NA
## 188     TRUE               NA
## 189     TRUE               NA
## 190     TRUE               NA
## 192     TRUE               NA
## 193     TRUE               NA
## 194     TRUE               NA
## 196     TRUE               NA
## 197     TRUE               NA
#subset(glb_feats_df, id %in% c("A.nuppr.log", "S.nuppr.log"))
print(myplot_scatter(glb_feats_df, "percentUnique", "freqRatio", 
                     colorcol_name="myNearZV", jitter=TRUE) + 
          geom_point(aes(shape=nzv)) + xlim(-5, 25))
## Warning in myplot_scatter(glb_feats_df, "percentUnique", "freqRatio",
## colorcol_name = "myNearZV", : converting myNearZV to class:factor
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).

print(subset(glb_feats_df, myNearZV))
##                id         cor.y exclude.as.feat    cor.y.abs cor.high.X
## 86       D.T.left  0.0525031466               1 0.0525031466       <NA>
## 155       D.T.two  0.0447161329               1 0.0447161329       <NA>
## 41     D.T.corpor  0.0353706112               1 0.0353706112       <NA>
## 101      D.T.name  0.0352663148               1 0.0352663148       <NA>
## 131    D.T.scroll  0.0339028710               1 0.0339028710       <NA>
## 76       D.T.imag  0.0335537013               1 0.0335537013       <NA>
## 136     D.T.shape  0.0288373334               1 0.0288373334       <NA>
## 53   D.T.disclaim  0.0250040676               1 0.0250040676       <NA>
## 56        D.T.els  0.0250040676               1 0.0250040676       <NA>
## 58    D.T.essenti  0.0250040676               1 0.0250040676       <NA>
## 126   D.T.repeat.  0.0250040676               1 0.0250040676       <NA>
## 148     D.T.super  0.0250040676               1 0.0250040676       <NA>
## 174 D.npnct07.log  0.0250040676               0 0.0250040676       <NA>
## 18      D.T.anoth  0.0164535903               1 0.0164535903       <NA>
## 65       D.T.full  0.0149589509               1 0.0149589509       <NA>
## 55        D.T.edg  0.0114844118               1 0.0114844118       <NA>
## 100      D.T.must  0.0113915486               1 0.0113915486       <NA>
## 114     D.T.photo  0.0110333858               1 0.0110333858       <NA>
## 133      D.T.seal  0.0106898740               1 0.0106898740       <NA>
## 143   D.T.speaker  0.0096402551               1 0.0096402551       <NA>
## 139      D.T.side  0.0089049983               1 0.0089049983       <NA>
## 84   D.T.keyboard  0.0082735718               1 0.0082735718       <NA>
## 157     D.T.upper  0.0078374765               1 0.0078374765       <NA>
## 141      D.T.sinc  0.0072330260               1 0.0072330260       <NA>
## 12      D.T.X2016 -0.0005289068               1 0.0005289068       <NA>
## 68       D.T.geek -0.0064074827               1 0.0064074827       <NA>
## 144     D.T.squad -0.0064074827               1 0.0064074827       <NA>
## 34       D.T.chip -0.0067464224               1 0.0067464224       <NA>
## 128     D.T.right -0.0080547459               1 0.0080547459       <NA>
## 88     D.T.lightn -0.0099034064               1 0.0099034064       <NA>
## 147    D.T.stylus -0.0125154705               1 0.0125154705       <NA>
## 49     D.T.detail -0.0139118798               1 0.0139118798       <NA>
## 14      D.T.activ -0.0166891768               1 0.0166891768       <NA>
## 23      D.T.beetl -0.0215250231               1 0.0215250231       <NA>
## 46     D.T.defens -0.0215250231               1 0.0215250231       <NA>
## 63      D.T.final -0.0215250231               1 0.0215250231       <NA>
## 73     D.T.higher -0.0215250231               1 0.0215250231       <NA>
## 99      D.T.money -0.0215250231               1 0.0215250231       <NA>
## 185 D.npnct18.log -0.0215250231               0 0.0215250231       <NA>
## 25       D.T.bodi -0.0252978602               1 0.0252978602       <NA>
## 115       D.T.pic -0.0292106355               1 0.0292106355       <NA>
## 111   D.T.passcod -0.0303239925               1 0.0303239925       <NA>
## 6        D.P.gold -0.0304491748               1 0.0304491748       <NA>
## 39    D.T.contain -0.0355839439               1 0.0355839439       <NA>
## 42    D.T.correct -0.0373025158               1 0.0373025158       <NA>
## 62     D.T.featur -0.0373025158               1 0.0373025158       <NA>
## 22       D.T.bare -0.0509186819               1 0.0509186819       <NA>
## 20     D.T.averag -0.0555976359               1 0.0555976359       <NA>
## 79    D.T.inspect -0.0568460093               1 0.0568460093       <NA>
## 7        D.P.http            NA               1           NA       <NA>
## 60     D.T.expect            NA               1           NA       <NA>
## 80      D.T.intro            NA               1           NA       <NA>
## 169 D.npnct02.log            NA               0           NA       <NA>
## 171 D.npnct04.log            NA               0           NA       <NA>
## 184 D.npnct17.log            NA               0           NA       <NA>
## 186 D.npnct19.log            NA               0           NA       <NA>
## 187 D.npnct20.log            NA               0           NA       <NA>
## 188 D.npnct21.log            NA               0           NA       <NA>
## 189 D.npnct22.log            NA               0           NA       <NA>
## 190 D.npnct23.log            NA               0           NA       <NA>
## 192 D.npnct25.log            NA               0           NA       <NA>
## 193 D.npnct26.log            NA               0           NA       <NA>
## 194 D.npnct27.log            NA               0           NA       <NA>
## 196 D.npnct29.log            NA               0           NA       <NA>
## 197 D.npnct30.log            NA               0           NA       <NA>
##     freqRatio percentUnique zeroVar  nzv myNearZV is.cor.y.abs.low
## 86   925.5000    0.37654653   FALSE TRUE     TRUE            FALSE
## 155  617.6667    0.21516945   FALSE TRUE     TRUE            FALSE
## 41   928.5000    0.10758472   FALSE TRUE     TRUE            FALSE
## 101 1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 131  618.3333    0.16137708   FALSE TRUE     TRUE            FALSE
## 76  1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 136  615.0000    0.48413125   FALSE TRUE     TRUE            FALSE
## 53  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 56  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 58  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 126 1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 148 1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 174 1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 18   618.6667    0.10758472   FALSE TRUE     TRUE            FALSE
## 65   616.0000    0.37654653   FALSE TRUE     TRUE            FALSE
## 55   617.6667    0.21516945   FALSE TRUE     TRUE            FALSE
## 100  618.0000    0.21516945   FALSE TRUE     TRUE            FALSE
## 114  614.0000    0.59171598   FALSE TRUE     TRUE            FALSE
## 133  927.0000    0.26896181   FALSE TRUE     TRUE            FALSE
## 143  928.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 139  927.5000    0.21516945   FALSE TRUE     TRUE            FALSE
## 84  1854.0000    0.32275417   FALSE TRUE     TRUE            FALSE
## 157 1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 141  615.6667    0.37654653   FALSE TRUE     TRUE            FALSE
## 12  1857.0000    0.16137708   FALSE TRUE     TRUE             TRUE
## 68  1856.0000    0.21516945   FALSE TRUE     TRUE            FALSE
## 144 1856.0000    0.21516945   FALSE TRUE     TRUE            FALSE
## 34  1855.0000    0.26896181   FALSE TRUE     TRUE            FALSE
## 128  616.0000    0.48413125   FALSE TRUE     TRUE            FALSE
## 88  1856.0000    0.21516945   FALSE TRUE     TRUE            FALSE
## 147 1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 49  1856.0000    0.21516945   FALSE TRUE     TRUE            FALSE
## 14   927.5000    0.21516945   FALSE TRUE     TRUE            FALSE
## 23  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 46  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 63  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 73  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 99  1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 185 1858.0000    0.10758472   FALSE TRUE     TRUE            FALSE
## 25   925.5000    0.32275417   FALSE TRUE     TRUE            FALSE
## 115 1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 111 1857.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 6    928.5000    0.10758472   FALSE TRUE     TRUE            FALSE
## 39   928.0000    0.16137708   FALSE TRUE     TRUE            FALSE
## 42   618.6667    0.10758472   FALSE TRUE     TRUE            FALSE
## 62   618.6667    0.10758472   FALSE TRUE     TRUE            FALSE
## 22   615.3333    0.43033889   FALSE TRUE     TRUE            FALSE
## 20   617.3333    0.26896181   FALSE TRUE     TRUE            FALSE
## 79   617.3333    0.21516945   FALSE TRUE     TRUE            FALSE
## 7      0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 60     0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 80     0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 169    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 171    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 184    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 186    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 187    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 188    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 189    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 190    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 192    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 193    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 194    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 196    0.0000    0.05379236    TRUE TRUE     TRUE               NA
## 197    0.0000    0.05379236    TRUE TRUE     TRUE               NA
glb_allobs_df <- glb_allobs_df[, setdiff(names(glb_allobs_df), 
                                         subset(glb_feats_df, myNearZV)$id)]
glb_trnobs_df <- subset(glb_allobs_df, .src == "Train")
glb_newobs_df <- subset(glb_allobs_df, .src == "Test")

if (!is.null(glb_interaction_only_features))
    glb_feats_df[glb_feats_df$id %in% glb_interaction_only_features, "interaction.feat"] <-
        names(glb_interaction_only_features) else
    glb_feats_df$interaction.feat <- NA        

mycheck_problem_data(glb_allobs_df, terminate = TRUE)
## [1] "numeric data missing in : "
##      sold sold.fctr 
##       798       798 
## [1] "numeric data w/ 0s in : "
##                biddable                    sold          startprice.log 
##                    1444                     999                      31 
##           cellular.fctr     D.terms.n.post.stop D.terms.n.post.stop.log 
##                    1600                    1521                    1521 
##   D.TfIdf.sum.post.stop     D.terms.n.post.stem D.terms.n.post.stem.log 
##                    1521                    1521                    1521 
##   D.TfIdf.sum.post.stem              D.T.condit                 D.T.use 
##                    1521                    2158                    2366 
##             D.T.scratch                 D.T.new                D.T.good 
##                    2371                    2501                    2460 
##              D.T.screen               D.T.great                D.T.ipad 
##                    2444                    2532                    2425 
##                D.T.work               D.T.excel                D.T.like 
##                    2459                    2557                    2584 
##                 D.T.box           D.T.function.                D.T.item 
##                    2547                    2541                    2528 
##               D.T.fulli              D.T.cosmet               D.T.minor 
##                    2569                    2540                    2540 
##                D.T.mint               D.T.crack                D.T.wear 
##                    2594                    2580                    2556 
##             D.T.perfect              D.T.includ                D.T.lock 
##                    2602                    2574                    2614 
##                D.T.case              D.T.icloud                 D.T.see 
##                    2575                    2601                    2604 
##               D.T.light               D.T.devic               D.T.pleas 
##                    2576                    2577                    2590 
##                D.T.back              D.T.origin                D.T.dent 
##                    2580                    2599                    2592 
##                D.T.hous                D.T.sign                D.T.open 
##                    2585                    2580                    2613 
##               D.T.clean                D.T.will                D.T.appl 
##                    2615                    2618                    2598 
##             D.T.charger               D.T.damag                D.T.X100 
##                    2619                    2626                    2593 
##                D.T.come               D.T.scuff              D.T.corner 
##                    2602                    2615                    2612 
##               D.T.small              D.T.broken            D.T.descript 
##                    2611                    2637                    2624 
##                D.T.unit           D.T.refurbish                D.T.show 
##                    2617                    2623                    2606 
##                D.T.read                D.T.test              D.T.pictur 
##                    2626                    2620                    2624 
##               D.T.brand                D.T.list                 D.T.may 
##                    2627                    2616                    2619 
##                D.T.mark             D.T.blemish              D.T.packag 
##                    2629                    2625                    2631 
##                D.T.mini              D.T.affect              D.T.normal 
##                    2623                    2629                    2626 
##                 D.T.tab                 D.T.top           D.T.accessori 
##                    2630                    2633                    2629 
##                D.T.ding                D.T.near               D.T.digit 
##                    2632                    2623                    2639 
##                D.T.tear             D.T.display               D.T.minim 
##                    2626                    2634                    2629 
##                D.T.wifi               D.T.order           D.T.protector 
##                    2632                    2636                    2639 
##                D.T.kept            D.T.previous              D.T.button 
##                    2637                    2634                    2638 
##               D.T.alway             D.T.contact                D.T.fair 
##                    2639                    2642                    2635 
##                 D.T.air                 D.T.esn                D.T.free 
##                    2636                    2641                    2638 
##                D.T.imei                D.T.cabl          D.T.profession 
##                    2640                    2639                    2641 
##              D.T.overal              D.T.retail               D.T.refer 
##                    2643                    2648                    2646 
##               D.T.stock              D.T.seller               D.T.phone 
##                    2643                    2643                    2647 
##             D.T.problem          D.T.manufactur             D.T.certifi 
##                    2651                    2649                    2647 
##                D.T.ship             D.T.heavili                 D.T.non 
##                    2646                    2646                    2649 
##             D.T.handset             D.T.sticker             D.T.qualiti 
##                    2650                    2649                    2651 
##               D.T.least          D.T.technician                D.T.sync 
##                    2653                    2652                    2652 
##             D.nwrds.log         D.nwrds.unq.log             D.sum.TfIdf 
##                    1520                    1521                    1521 
## D.ratio.sum.TfIdf.nwrds             D.nchrs.log             D.nuppr.log 
##                    1521                    1520                    1522 
##             D.ndgts.log           D.npnct01.log           D.npnct03.log 
##                    2427                    2579                    2614 
##           D.npnct05.log           D.npnct06.log           D.npnct08.log 
##                    2592                    2554                    2581 
##           D.npnct09.log           D.npnct10.log           D.npnct11.log 
##                    2641                    2648                    2301 
##           D.npnct12.log           D.npnct13.log           D.npnct14.log 
##                    2538                    1932                    2582 
##           D.npnct15.log           D.npnct16.log           D.npnct24.log 
##                    2637                    2546                    1520 
##           D.npnct28.log         D.nstopwrds.log                D.P.mini 
##                    2649                    1664                    2623 
##                 D.P.air               D.P.black               D.P.white 
##                    2636                    2640                    2647 
##           D.P.spacegray 
##                    2650 
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description   condition    cellular     carrier       color     storage 
##        1520           0           0           0           0           0 
## productline      .grpid  prdline.my    descr.my 
##           0          NA           0        1520
# glb_allobs_df %>% filter(is.na(Married.fctr)) %>% tbl_df()
# glb_allobs_df %>% count(Married.fctr)
# levels(glb_allobs_df$Married.fctr)

glb_chunks_df <- myadd_chunk(glb_chunks_df, "partition.data.training", major.inc=TRUE)
##                     label step_major step_minor    bgn    end elapsed
## 8         select.features          5          0 60.320 66.001   5.681
## 9 partition.data.training          6          0 66.001     NA      NA

Step 6.0: partition data training

if (all(is.na(glb_newobs_df[, glb_rsp_var]))) {
    
    set.seed(glb_split_sample.seed)
    
    OOB_size <- nrow(glb_newobs_df) * 1.1
    if (is.null(glb_category_var)) {
        require(caTools)
        split <- sample.split(glb_trnobs_df[, glb_rsp_var_raw], 
                              SplitRatio=OOB_size / nrow(glb_trnobs_df))
        glb_OOBobs_df <- glb_trnobs_df[split ,]            
        glb_fitobs_df <- glb_trnobs_df[!split, ] 
    } else {
        sample_vars <- c(glb_rsp_var_raw, glb_category_var)
        rspvar_freq_df <- orderBy(reformulate(glb_rsp_var_raw), 
                                  mycreate_sqlxtab_df(glb_trnobs_df, glb_rsp_var_raw))
        OOB_rspvar_size <- 1.0 * OOB_size * rspvar_freq_df$.n / sum(rspvar_freq_df$.n) 
        newobs_freq_df <- orderBy(reformulate(glb_category_var),
                                  mycreate_sqlxtab_df(glb_newobs_df, glb_category_var))
        trnobs_freq_df <- orderBy(reformulate(glb_category_var),
                                  mycreate_sqlxtab_df(glb_trnobs_df, glb_category_var))
        allobs_freq_df <- merge(newobs_freq_df, trnobs_freq_df, by=glb_category_var,
                                all=TRUE, sort=TRUE, suffixes=c(".Tst", ".Train"))
        allobs_freq_df[is.na(allobs_freq_df)] <- 0
        OOB_strata_size <- ceiling(
            as.vector(matrix(allobs_freq_df$.n.Tst * 1.0 / sum(allobs_freq_df$.n.Tst)) %*%
                      matrix(OOB_rspvar_size, nrow=1)))
        OOB_strata_size[OOB_strata_size == 0] <- 1
        OOB_strata_df <- expand.grid(glb_rsp_var_raw=rspvar_freq_df[, glb_rsp_var_raw],
                                     glb_category_var=allobs_freq_df[, glb_category_var])
        names(OOB_strata_df) <- sample_vars
        OOB_strata_df <- orderBy(reformulate(sample_vars), OOB_strata_df)
        
        trnobs_univ_df <- orderBy(reformulate(sample_vars),
                                       mycreate_sqlxtab_df(glb_trnobs_df, sample_vars))
        trnobs_univ_df <- merge(trnobs_univ_df, OOB_strata_df, all=TRUE)
        tmp_trnobs_df <- orderBy(reformulate(c(glb_rsp_var_raw, glb_category_var)),
                                glb_trnobs_df)
        require(sampling)
        split_strata <- strata(tmp_trnobs_df, 
                               stratanames=c(glb_rsp_var_raw, glb_category_var),
                               size=OOB_strata_size[!is.na(trnobs_univ_df$.n)],
                               method="srswor")
        glb_OOBobs_df <- getdata(tmp_trnobs_df, split_strata)[, names(glb_trnobs_df)]
        glb_fitobs_df <- glb_trnobs_df[!glb_trnobs_df[, glb_id_var] %in% 
                                        glb_OOBobs_df[, glb_id_var], ]
    }
} else {
    print(sprintf("Newdata contains non-NA data for %s; setting OOB to Newdata", 
                  glb_rsp_var))
    glb_fitobs_df <- glb_trnobs_df; glb_OOBobs_df <- glb_newobs_df
}
## Loading required package: sampling
## 
## Attaching package: 'sampling'
## 
## The following objects are masked from 'package:survival':
## 
##     cluster, strata
## 
## The following object is masked from 'package:caret':
## 
##     cluster
if (!is.null(glb_max_fitobs) && (nrow(glb_fitobs_df) > glb_max_fitobs)) {
    warning("glb_fitobs_df restricted to glb_max_fitobs: ", 
            format(glb_max_fitobs, big.mark=","))
    org_fitobs_df <- glb_fitobs_df
    glb_fitobs_df <- 
        org_fitobs_df[split <- sample.split(org_fitobs_df[, glb_rsp_var_raw], 
                                            SplitRatio=glb_max_fitobs), ]
    org_fitobs_df <- NULL
}

glb_allobs_df$.lcn <- ""; glb_trnobs_df$.lcn <- "";
glb_allobs_df[glb_allobs_df[, glb_id_var] %in% 
              glb_fitobs_df[, glb_id_var], ".lcn"] <- "Fit"
glb_trnobs_df[glb_trnobs_df[, glb_id_var] %in% 
              glb_fitobs_df[, glb_id_var], ".lcn"] <- "Fit"
glb_allobs_df[glb_allobs_df[, glb_id_var] %in% 
              glb_OOBobs_df[, glb_id_var], ".lcn"] <- "OOB"
glb_trnobs_df[glb_trnobs_df[, glb_id_var] %in% 
              glb_OOBobs_df[, glb_id_var], ".lcn"] <- "OOB"

dsp_class_dstrb <- function(obs_df, location_var, partition_var) {
    xtab_df <- mycreate_xtab_df(obs_df, c(location_var, partition_var))
    rownames(xtab_df) <- xtab_df[, location_var]
    xtab_df <- xtab_df[, -grepl(location_var, names(xtab_df))]
    print(xtab_df)
    print(xtab_df / rowSums(xtab_df, na.rm=TRUE))    
}    

# Ensure proper splits by glb_rsp_var_raw & user-specified feature for OOB vs. new
if (!is.null(glb_category_var)) {
    if (glb_is_classification)
        dsp_class_dstrb(glb_allobs_df, ".lcn", glb_rsp_var_raw)
    newobs_ctgry_df <- mycreate_sqlxtab_df(subset(glb_allobs_df, .src == "Test"), 
                                           glb_category_var)
    OOBobs_ctgry_df <- mycreate_sqlxtab_df(subset(glb_allobs_df, .lcn == "OOB"), 
                                           glb_category_var)
    glb_ctgry_df <- merge(newobs_ctgry_df, OOBobs_ctgry_df, by=glb_category_var
                          , all=TRUE, suffixes=c(".Tst", ".OOB"))
    glb_ctgry_df$.freqRatio.Tst <- glb_ctgry_df$.n.Tst / sum(glb_ctgry_df$.n.Tst, na.rm=TRUE)
    glb_ctgry_df$.freqRatio.OOB <- glb_ctgry_df$.n.OOB / sum(glb_ctgry_df$.n.OOB, na.rm=TRUE)
    print(orderBy(~-.freqRatio.Tst-.freqRatio.OOB, glb_ctgry_df))
}
##     sold.0 sold.1 sold.NA
##         NA     NA     798
## Fit    522    447      NA
## OOB    477    413      NA
##        sold.0    sold.1 sold.NA
##            NA        NA       1
## Fit 0.5386997 0.4613003      NA
## OOB 0.5359551 0.4640449      NA
##    prdl.my.descr.fctr .n.Tst .n.OOB .freqRatio.Tst .freqRatio.OOB
## 9           iPadAir#0     88     98     0.11027569     0.11011236
## 5            iPad 2#0     83     93     0.10401003     0.10449438
## 6            iPad 2#1     71     79     0.08897243     0.08876404
## 13         iPadmini#0     65     73     0.08145363     0.08202247
## 8           iPad 3+#1     64     71     0.08020050     0.07977528
## 11      iPadmini 2+#0     64     71     0.08020050     0.07977528
## 7           iPad 3+#0     59     66     0.07393484     0.07415730
## 10          iPadAir#1     49     54     0.06140351     0.06067416
## 14         iPadmini#1     49     54     0.06140351     0.06067416
## 3            iPad 1#0     46     52     0.05764411     0.05842697
## 1           Unknown#0     45     50     0.05639098     0.05617978
## 4            iPad 1#1     43     48     0.05388471     0.05393258
## 2           Unknown#1     42     47     0.05263158     0.05280899
## 12      iPadmini 2+#1     30     34     0.03759398     0.03820225
# Run this line by line
print("glb_feats_df:");   print(dim(glb_feats_df))
## [1] "glb_feats_df:"
## [1] 224  12
sav_feats_df <- glb_feats_df
glb_feats_df <- sav_feats_df

glb_feats_df[, "rsp_var_raw"] <- FALSE
glb_feats_df[glb_feats_df$id == glb_rsp_var_raw, "rsp_var_raw"] <- TRUE 
glb_feats_df$exclude.as.feat <- (glb_feats_df$exclude.as.feat == 1)
if (!is.null(glb_id_var) && glb_id_var != ".rownames")
    glb_feats_df[glb_feats_df$id %in% glb_id_var, "id_var"] <- TRUE 
add_feats_df <- data.frame(id=glb_rsp_var, exclude.as.feat=TRUE, rsp_var=TRUE)
row.names(add_feats_df) <- add_feats_df$id; print(add_feats_df)
##                  id exclude.as.feat rsp_var
## sold.fctr sold.fctr            TRUE    TRUE
glb_feats_df <- myrbind_df(glb_feats_df, add_feats_df)
if (glb_id_var != ".rownames")
    print(subset(glb_feats_df, rsp_var_raw | rsp_var | id_var)) else
    print(subset(glb_feats_df, rsp_var_raw | rsp_var))    
##                  id      cor.y exclude.as.feat cor.y.abs cor.high.X
## 219            sold  1.0000000            TRUE 1.0000000       <NA>
## 210        UniqueID -0.1895466            TRUE 0.1895466       <NA>
## sold.fctr sold.fctr         NA            TRUE        NA       <NA>
##           freqRatio percentUnique zeroVar   nzv myNearZV is.cor.y.abs.low
## 219        1.161628     0.1075847   FALSE FALSE    FALSE            FALSE
## 210        1.000000   100.0000000   FALSE FALSE    FALSE            FALSE
## sold.fctr        NA            NA      NA    NA       NA               NA
##           interaction.feat rsp_var_raw id_var rsp_var
## 219                   <NA>        TRUE     NA      NA
## 210                   <NA>       FALSE   TRUE      NA
## sold.fctr             <NA>          NA     NA    TRUE
print("glb_feats_df vs. glb_allobs_df: "); 
## [1] "glb_feats_df vs. glb_allobs_df: "
print(setdiff(glb_feats_df$id, names(glb_allobs_df)))
##  [1] "D.T.left"      "D.T.two"       "D.T.corpor"    "D.T.name"     
##  [5] "D.T.scroll"    "D.T.imag"      "D.T.shape"     "D.T.disclaim" 
##  [9] "D.T.els"       "D.T.essenti"   "D.T.repeat."   "D.T.super"    
## [13] "D.npnct07.log" "D.T.anoth"     "D.T.full"      "D.T.edg"      
## [17] "D.T.must"      "D.T.photo"     "D.T.seal"      "D.T.speaker"  
## [21] "D.T.side"      "D.T.keyboard"  "D.T.upper"     "D.T.sinc"     
## [25] "D.T.X2016"     "D.T.geek"      "D.T.squad"     "D.T.chip"     
## [29] "D.T.right"     "D.T.lightn"    "D.T.stylus"    "D.T.detail"   
## [33] "D.T.activ"     "D.T.beetl"     "D.T.defens"    "D.T.final"    
## [37] "D.T.higher"    "D.T.money"     "D.npnct18.log" "D.T.bodi"     
## [41] "D.T.pic"       "D.T.passcod"   "D.P.gold"      "D.T.contain"  
## [45] "D.T.correct"   "D.T.featur"    "D.T.bare"      "D.T.averag"   
## [49] "D.T.inspect"   "D.P.http"      "D.T.expect"    "D.T.intro"    
## [53] "D.npnct02.log" "D.npnct04.log" "D.npnct17.log" "D.npnct19.log"
## [57] "D.npnct20.log" "D.npnct21.log" "D.npnct22.log" "D.npnct23.log"
## [61] "D.npnct25.log" "D.npnct26.log" "D.npnct27.log" "D.npnct29.log"
## [65] "D.npnct30.log"
print("glb_allobs_df vs. glb_feats_df: "); 
## [1] "glb_allobs_df vs. glb_feats_df: "
# Ensure these are only chr vars
print(setdiff(setdiff(names(glb_allobs_df), glb_feats_df$id), 
                myfind_chr_cols_df(glb_allobs_df)))
## character(0)
#print(setdiff(setdiff(names(glb_allobs_df), glb_exclude_vars_as_features), 
#                glb_feats_df$id))

print("glb_allobs_df: "); print(dim(glb_allobs_df))
## [1] "glb_allobs_df: "
## [1] 2657  172
print("glb_trnobs_df: "); print(dim(glb_trnobs_df))
## [1] "glb_trnobs_df: "
## [1] 1859  172
print("glb_fitobs_df: "); print(dim(glb_fitobs_df))
## [1] "glb_fitobs_df: "
## [1] 969 171
print("glb_OOBobs_df: "); print(dim(glb_OOBobs_df))
## [1] "glb_OOBobs_df: "
## [1] 890 171
print("glb_newobs_df: "); print(dim(glb_newobs_df))
## [1] "glb_newobs_df: "
## [1] 798 171
# # Does not handle NULL or length(glb_id_var) > 1

if (glb_save_envir)
    save(glb_feats_df, 
         glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
         file=paste0(glb_out_pfx, "blddfs_dsk.RData"))
# load(paste0(glb_out_pfx, "blddfs_dsk.RData"))

# if (!all.equal(tmp_feats_df, glb_feats_df))
#     stop("glb_feats_df r/w not working")
# if (!all.equal(tmp_entity_df, glb_allobs_df))
#     stop("glb_allobs_df r/w not working")

rm(split)
## Warning in rm(split): object 'split' not found
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=TRUE)
##                      label step_major step_minor    bgn    end elapsed
## 9  partition.data.training          6          0 66.001 67.215   1.214
## 10              fit.models          7          0 67.216     NA      NA

Step 7.0: fit models

# load(paste0(glb_out_pfx, "dsk.RData"))

if (glb_is_classification && glb_is_binomial && 
        (length(unique(glb_fitobs_df[, glb_rsp_var])) < 2))
    stop("glb_fitobs_df$", glb_rsp_var, ": contains less than 2 unique values: ",
         paste0(unique(glb_fitobs_df[, glb_rsp_var]), collapse=", "))

max_cor_y_x_vars <- orderBy(~ -cor.y.abs, 
        subset(glb_feats_df, (exclude.as.feat == 0) & !is.cor.y.abs.low & 
                                is.na(cor.high.X)))[1:2, "id"]
# while(length(max_cor_y_x_vars) < 2) {
#     max_cor_y_x_vars <- c(max_cor_y_x_vars, orderBy(~ -cor.y.abs, 
#             subset(glb_feats_df, (exclude.as.feat == 0) & !is.cor.y.abs.low))[3, "id"])    
# }
if (!is.null(glb_Baseline_mdl_var)) {
    if ((max_cor_y_x_vars[1] != glb_Baseline_mdl_var) & 
        (glb_feats_df[glb_feats_df$id == max_cor_y_x_vars[1], "cor.y.abs"] > 
         glb_feats_df[glb_feats_df$id == glb_Baseline_mdl_var, "cor.y.abs"]))
        stop(max_cor_y_x_vars[1], " has a higher correlation with ", glb_rsp_var, 
             " than the Baseline var: ", glb_Baseline_mdl_var)
}

glb_model_type <- ifelse(glb_is_regression, "regression", "classification")
    
# Baseline
if (!is.null(glb_Baseline_mdl_var)) 
    ret_lst <- myfit_mdl(model_id="Baseline", 
                         model_method="mybaseln_classfr",
                        indep_vars_vctr=glb_Baseline_mdl_var,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)

# Most Frequent Outcome "MFO" model: mean(y) for regression
#   Not using caret's nullModel since model stats not avl
#   Cannot use rpart for multinomial classification since it predicts non-MFO
ret_lst <- myfit_mdl(model_id="MFO", 
                     model_method=ifelse(glb_is_regression, "lm", "myMFO_classfr"), 
                     model_type=glb_model_type,
                        indep_vars_vctr=".rnorm",
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: MFO.myMFO_classfr"
## [1] "    indep_vars: .rnorm"
## Fitting parameter = none on full training set
## [1] "in MFO.Classifier$fit"
## [1] "unique.vals:"
## [1] N Y
## Levels: N Y
## [1] "unique.prob:"
## y
##         N         Y 
## 0.5386997 0.4613003 
## [1] "MFO.val:"
## [1] "N"
##             Length Class      Mode     
## unique.vals 2      factor     numeric  
## unique.prob 2      -none-     numeric  
## MFO.val     1      -none-     character
## x.names     1      -none-     character
## xNames      1      -none-     character
## problemType 1      -none-     character
## tuneValue   1      data.frame list     
## obsLevels   2      -none-     character
## [1] "    calling mypredict_mdl for fit:"
## Loading required package: ROCR
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
## [1] "in MFO.Classifier$prob"
##           N         Y
## 1 0.5386997 0.4613003
## 2 0.5386997 0.4613003
## 3 0.5386997 0.4613003
## 4 0.5386997 0.4613003
## 5 0.5386997 0.4613003
## 6 0.5386997 0.4613003
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.MFO.myMFO_classfr.N
## 1         N                                   522
## 2         Y                                   447
##          Prediction
## Reference   N   Y
##         N 522   0
##         Y 447   0
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.386997e-01   0.000000e+00   5.067192e-01   5.704443e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   5.131824e-01   8.826336e-99 
## [1] "    calling mypredict_mdl for OOB:"
## [1] "in MFO.Classifier$prob"
##           N         Y
## 1 0.5386997 0.4613003
## 2 0.5386997 0.4613003
## 3 0.5386997 0.4613003
## 4 0.5386997 0.4613003
## 5 0.5386997 0.4613003
## 6 0.5386997 0.4613003
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.MFO.myMFO_classfr.N
## 1         N                                   477
## 2         Y                                   413
##          Prediction
## Reference   N   Y
##         N 477   0
##         Y 413   0
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.359551e-01   0.000000e+00   5.025561e-01   5.691153e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   5.137245e-01   2.217817e-91 
##            model_id  model_method  feats max.nTuningRuns
## 1 MFO.myMFO_classfr myMFO_classfr .rnorm               0
##   min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1                      0.367                 0.003         0.5
##   opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1                    0.5               0        0.5386997
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.5067192             0.5704443             0         0.5
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5               0        0.5359551
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.5025561             0.5691153             0
if (glb_is_classification)
    # "random" model - only for classification; 
    #   none needed for regression since it is same as MFO
    ret_lst <- myfit_mdl(model_id="Random", model_method="myrandom_classfr",
                            model_type=glb_model_type,                         
                            indep_vars_vctr=".rnorm",
                            rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                            fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: Random.myrandom_classfr"
## [1] "    indep_vars: .rnorm"
## Fitting parameter = none on full training set
##             Length Class      Mode     
## unique.vals 2      factor     numeric  
## unique.prob 2      table      numeric  
## xNames      1      -none-     character
## problemType 1      -none-     character
## tuneValue   1      data.frame list     
## obsLevels   2      -none-     character
## [1] "    calling mypredict_mdl for fit:"
## [1] "in Random.Classifier$prob"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6313559
## 3        0.2 0.6313559
## 4        0.3 0.6313559
## 5        0.4 0.6313559
## 6        0.5 0.4611973
## 7        0.6 0.0000000
## 8        0.7 0.0000000
## 9        0.8 0.0000000
## 10       0.9 0.0000000
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Random.myrandom_classfr.Y
## 1         N                                         522
## 2         Y                                         447
##          Prediction
## Reference   N   Y
##         N   0 522
##         Y   0 447
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.613003e-01   0.000000e+00   4.295557e-01   4.932808e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   9.999994e-01  4.227904e-115 
## [1] "    calling mypredict_mdl for OOB:"
## [1] "in Random.Classifier$prob"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6339217
## 3        0.2 0.6339217
## 4        0.3 0.6339217
## 5        0.4 0.6339217
## 6        0.5 0.4817518
## 7        0.6 0.0000000
## 8        0.7 0.0000000
## 9        0.8 0.0000000
## 10       0.9 0.0000000
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Random.myrandom_classfr.Y
## 1         N                                         477
## 2         Y                                         413
##          Prediction
## Reference   N   Y
##         N   0 477
##         Y   0 413
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.640449e-01   0.000000e+00   4.308847e-01   4.974439e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   9.999925e-01  2.613895e-105 
##                  model_id     model_method  feats max.nTuningRuns
## 1 Random.myrandom_classfr myrandom_classfr .rnorm               0
##   min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1                      0.257                 0.002   0.4960722
##   opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1                    0.4       0.6313559        0.4613003
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.4295557             0.4932808             0   0.5185354
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.4       0.6339217        0.4640449
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.4308847             0.4974439             0
# Any models that have tuning parameters has "better" results with cross-validation
#   (except rf) & "different" results for different outcome metrics

# Max.cor.Y
#   Check impact of cv
#       rpart is not a good candidate since caret does not optimize cp (only tuning parameter of rpart) well
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0", 
                        model_method="rpart",
                     model_type=glb_model_type,
                        indep_vars_vctr=max_cor_y_x_vars,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: Max.cor.Y.cv.0.rpart"
## [1] "    indep_vars: biddable, startprice.diff"
## Loading required package: rpart
## Fitting cp = 0.528 on full training set
## Loading required package: rpart.plot

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 969 
## 
##          CP nsplit rel error
## 1 0.5279642      0         1
## 
## Node number 1: 969 observations
##   predicted class=N  expected loss=0.4613003  P(node) =1
##     class counts:   522   447
##    probabilities: 0.539 0.461 
## 
## n= 969 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 969 447 N (0.5386997 0.4613003) *
## [1] "    calling mypredict_mdl for fit:"
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1         N                                      522
## 2         Y                                      447
##          Prediction
## Reference   N   Y
##         N 522   0
##         Y 447   0
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.386997e-01   0.000000e+00   5.067192e-01   5.704443e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   5.131824e-01   8.826336e-99 
## [1] "    calling mypredict_mdl for OOB:"
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1         N                                      477
## 2         Y                                      413
##          Prediction
## Reference   N   Y
##         N 477   0
##         Y 413   0
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.359551e-01   0.000000e+00   5.025561e-01   5.691153e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   5.137245e-01   2.217817e-91 
##               model_id model_method                     feats
## 1 Max.cor.Y.cv.0.rpart        rpart biddable, startprice.diff
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               0                      0.598                 0.012
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1         0.5                    0.5               0        0.5386997
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.5067192             0.5704443             0         0.5
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5               0        0.5359551
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.5025561             0.5691153             0
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0.cp.0", 
                        model_method="rpart",
                     model_type=glb_model_type,
                        indep_vars_vctr=max_cor_y_x_vars,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                        n_cv_folds=0, 
            tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
## [1] "fitting model: Max.cor.Y.cv.0.cp.0.rpart"
## [1] "    indep_vars: biddable, startprice.diff"
## Fitting cp = 0 on full training set

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 969 
## 
##             CP nsplit rel error
## 1 0.5279642058      0 1.0000000
## 2 0.1342281879      1 0.4720358
## 3 0.0041946309      2 0.3378076
## 4 0.0033557047     11 0.2997763
## 5 0.0022371365     13 0.2930649
## 6 0.0011185682     15 0.2885906
## 7 0.0005592841     21 0.2796421
## 8 0.0000000000     25 0.2774049
## 
## Variable importance
##        biddable startprice.diff 
##              52              48 
## 
## Node number 1: 969 observations,    complexity param=0.5279642
##   predicted class=N  expected loss=0.4613003  P(node) =1
##     class counts:   522   447
##    probabilities: 0.539 0.461 
##   left son=2 (539 obs) right son=3 (430 obs)
##   Primary splits:
##       biddable        < 0.5       to the left,  improve=151.58290, (0 missing)
##       startprice.diff < 62.89456  to the right, improve= 82.96307, (0 missing)
##   Surrogate splits:
##       startprice.diff < 250.1071  to the left,  agree=0.562, adj=0.014, (0 split)
## 
## Node number 2: 539 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.2115028  P(node) =0.5562436
##     class counts:   425   114
##    probabilities: 0.788 0.212 
##   left son=4 (167 obs) right son=5 (372 obs)
##   Primary splits:
##       startprice.diff < 40.80806  to the right, improve=13.91796, (0 missing)
## 
## Node number 3: 430 observations,    complexity param=0.1342282
##   predicted class=Y  expected loss=0.2255814  P(node) =0.4437564
##     class counts:    97   333
##    probabilities: 0.226 0.774 
##   left son=6 (80 obs) right son=7 (350 obs)
##   Primary splits:
##       startprice.diff < 63.51092  to the right, improve=82.90292, (0 missing)
## 
## Node number 4: 167 observations
##   predicted class=N  expected loss=0.04191617  P(node) =0.1723426
##     class counts:   160     7
##    probabilities: 0.958 0.042 
## 
## Node number 5: 372 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.2876344  P(node) =0.3839009
##     class counts:   265   107
##    probabilities: 0.712 0.288 
##   left son=10 (128 obs) right son=11 (244 obs)
##   Primary splits:
##       startprice.diff < -35.3304  to the left,  improve=13.51309, (0 missing)
## 
## Node number 6: 80 observations
##   predicted class=N  expected loss=0.125  P(node) =0.08255934
##     class counts:    70    10
##    probabilities: 0.875 0.125 
## 
## Node number 7: 350 observations,    complexity param=0.003355705
##   predicted class=Y  expected loss=0.07714286  P(node) =0.3611971
##     class counts:    27   323
##    probabilities: 0.077 0.923 
##   left son=14 (24 obs) right son=15 (326 obs)
##   Primary splits:
##       startprice.diff < 44.72834  to the right, improve=3.382343, (0 missing)
## 
## Node number 10: 128 observations
##   predicted class=N  expected loss=0.1015625  P(node) =0.1320949
##     class counts:   115    13
##    probabilities: 0.898 0.102 
## 
## Node number 11: 244 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.3852459  P(node) =0.251806
##     class counts:   150    94
##    probabilities: 0.615 0.385 
##   left son=22 (65 obs) right son=23 (179 obs)
##   Primary splits:
##       startprice.diff < 20.77843  to the right, improve=2.079314, (0 missing)
## 
## Node number 14: 24 observations,    complexity param=0.003355705
##   predicted class=Y  expected loss=0.3333333  P(node) =0.0247678
##     class counts:     8    16
##    probabilities: 0.333 0.667 
##   left son=28 (7 obs) right son=29 (17 obs)
##   Primary splits:
##       startprice.diff < 47.96221  to the left,  improve=2.868347, (0 missing)
## 
## Node number 15: 326 observations
##   predicted class=Y  expected loss=0.05828221  P(node) =0.3364293
##     class counts:    19   307
##    probabilities: 0.058 0.942 
## 
## Node number 22: 65 observations,    complexity param=0.002237136
##   predicted class=N  expected loss=0.2769231  P(node) =0.06707946
##     class counts:    47    18
##    probabilities: 0.723 0.277 
##   left son=44 (58 obs) right son=45 (7 obs)
##   Primary splits:
##       startprice.diff < 37.95043  to the left,  improve=1.360818, (0 missing)
## 
## Node number 23: 179 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.424581  P(node) =0.1847265
##     class counts:   103    76
##    probabilities: 0.575 0.425 
##   left son=46 (168 obs) right son=47 (11 obs)
##   Primary splits:
##       startprice.diff < -31.00081 to the right, improve=1.051349, (0 missing)
## 
## Node number 28: 7 observations
##   predicted class=N  expected loss=0.2857143  P(node) =0.007223942
##     class counts:     5     2
##    probabilities: 0.714 0.286 
## 
## Node number 29: 17 observations
##   predicted class=Y  expected loss=0.1764706  P(node) =0.01754386
##     class counts:     3    14
##    probabilities: 0.176 0.824 
## 
## Node number 44: 58 observations,    complexity param=0.0005592841
##   predicted class=N  expected loss=0.2413793  P(node) =0.05985552
##     class counts:    44    14
##    probabilities: 0.759 0.241 
##   left son=88 (9 obs) right son=89 (49 obs)
##   Primary splits:
##       startprice.diff < 36.35647  to the right, improve=1.241379, (0 missing)
## 
## Node number 45: 7 observations
##   predicted class=Y  expected loss=0.4285714  P(node) =0.007223942
##     class counts:     3     4
##    probabilities: 0.429 0.571 
## 
## Node number 46: 168 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.4107143  P(node) =0.1733746
##     class counts:    99    69
##    probabilities: 0.589 0.411 
##   left son=92 (15 obs) right son=93 (153 obs)
##   Primary splits:
##       startprice.diff < -21.8033  to the left,  improve=2.5345, (0 missing)
## 
## Node number 47: 11 observations
##   predicted class=Y  expected loss=0.3636364  P(node) =0.01135191
##     class counts:     4     7
##    probabilities: 0.364 0.636 
## 
## Node number 88: 9 observations
##   predicted class=N  expected loss=0  P(node) =0.009287926
##     class counts:     9     0
##    probabilities: 1.000 0.000 
## 
## Node number 89: 49 observations,    complexity param=0.0005592841
##   predicted class=N  expected loss=0.2857143  P(node) =0.0505676
##     class counts:    35    14
##    probabilities: 0.714 0.286 
##   left son=178 (8 obs) right son=179 (41 obs)
##   Primary splits:
##       startprice.diff < 24.4582   to the left,  improve=0.4939024, (0 missing)
## 
## Node number 92: 15 observations
##   predicted class=N  expected loss=0.1333333  P(node) =0.01547988
##     class counts:    13     2
##    probabilities: 0.867 0.133 
## 
## Node number 93: 153 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.4379085  P(node) =0.1578947
##     class counts:    86    67
##    probabilities: 0.562 0.438 
##   left son=186 (129 obs) right son=187 (24 obs)
##   Primary splits:
##       startprice.diff < -15.07226 to the right, improve=1.203982, (0 missing)
## 
## Node number 178: 8 observations
##   predicted class=N  expected loss=0.125  P(node) =0.008255934
##     class counts:     7     1
##    probabilities: 0.875 0.125 
## 
## Node number 179: 41 observations,    complexity param=0.0005592841
##   predicted class=N  expected loss=0.3170732  P(node) =0.04231166
##     class counts:    28    13
##    probabilities: 0.683 0.317 
##   left son=358 (18 obs) right son=359 (23 obs)
##   Primary splits:
##       startprice.diff < 30.11884  to the right, improve=0.5773536, (0 missing)
## 
## Node number 186: 129 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.4108527  P(node) =0.1331269
##     class counts:    76    53
##    probabilities: 0.589 0.411 
##   left son=372 (27 obs) right son=373 (102 obs)
##   Primary splits:
##       startprice.diff < -8.72155  to the left,  improve=1.569438, (0 missing)
## 
## Node number 187: 24 observations,    complexity param=0.002237136
##   predicted class=Y  expected loss=0.4166667  P(node) =0.0247678
##     class counts:    10    14
##    probabilities: 0.417 0.583 
##   left son=374 (15 obs) right son=375 (9 obs)
##   Primary splits:
##       startprice.diff < -16.81039 to the left,  improve=1.088889, (0 missing)
## 
## Node number 358: 18 observations
##   predicted class=N  expected loss=0.2222222  P(node) =0.01857585
##     class counts:    14     4
##    probabilities: 0.778 0.222 
## 
## Node number 359: 23 observations,    complexity param=0.0005592841
##   predicted class=N  expected loss=0.3913043  P(node) =0.02373581
##     class counts:    14     9
##    probabilities: 0.609 0.391 
##   left son=718 (16 obs) right son=719 (7 obs)
##   Primary splits:
##       startprice.diff < 28.47301  to the left,  improve=0.6529503, (0 missing)
## 
## Node number 372: 27 observations
##   predicted class=N  expected loss=0.2592593  P(node) =0.02786378
##     class counts:    20     7
##    probabilities: 0.741 0.259 
## 
## Node number 373: 102 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.4509804  P(node) =0.1052632
##     class counts:    56    46
##    probabilities: 0.549 0.451 
##   left son=746 (88 obs) right son=747 (14 obs)
##   Primary splits:
##       startprice.diff < -2.461955 to the right, improve=3.636427, (0 missing)
## 
## Node number 374: 15 observations
##   predicted class=N  expected loss=0.4666667  P(node) =0.01547988
##     class counts:     8     7
##    probabilities: 0.533 0.467 
## 
## Node number 375: 9 observations
##   predicted class=Y  expected loss=0.2222222  P(node) =0.009287926
##     class counts:     2     7
##    probabilities: 0.222 0.778 
## 
## Node number 718: 16 observations
##   predicted class=N  expected loss=0.3125  P(node) =0.01651187
##     class counts:    11     5
##    probabilities: 0.688 0.312 
## 
## Node number 719: 7 observations
##   predicted class=Y  expected loss=0.4285714  P(node) =0.007223942
##     class counts:     3     4
##    probabilities: 0.429 0.571 
## 
## Node number 746: 88 observations,    complexity param=0.004194631
##   predicted class=N  expected loss=0.3977273  P(node) =0.09081527
##     class counts:    53    35
##    probabilities: 0.602 0.398 
##   left son=1492 (78 obs) right son=1493 (10 obs)
##   Primary splits:
##       startprice.diff < 19.13936  to the left,  improve=0.9231935, (0 missing)
## 
## Node number 747: 14 observations
##   predicted class=Y  expected loss=0.2142857  P(node) =0.01444788
##     class counts:     3    11
##    probabilities: 0.214 0.786 
## 
## Node number 1492: 78 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.3717949  P(node) =0.08049536
##     class counts:    49    29
##    probabilities: 0.628 0.372 
##   left son=2984 (9 obs) right son=2985 (69 obs)
##   Primary splits:
##       startprice.diff < 16.92743  to the right, improve=1.382757, (0 missing)
## 
## Node number 1493: 10 observations
##   predicted class=Y  expected loss=0.4  P(node) =0.01031992
##     class counts:     4     6
##    probabilities: 0.400 0.600 
## 
## Node number 2984: 9 observations
##   predicted class=N  expected loss=0.1111111  P(node) =0.009287926
##     class counts:     8     1
##    probabilities: 0.889 0.111 
## 
## Node number 2985: 69 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.4057971  P(node) =0.07120743
##     class counts:    41    28
##    probabilities: 0.594 0.406 
##   left son=5970 (54 obs) right son=5971 (15 obs)
##   Primary splits:
##       startprice.diff < 12.81441  to the left,  improve=0.6235105, (0 missing)
## 
## Node number 5970: 54 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.3703704  P(node) =0.05572755
##     class counts:    34    20
##    probabilities: 0.630 0.370 
##   left son=11940 (11 obs) right son=11941 (43 obs)
##   Primary splits:
##       startprice.diff < 9.7053    to the right, improve=0.9822254, (0 missing)
## 
## Node number 5971: 15 observations
##   predicted class=Y  expected loss=0.4666667  P(node) =0.01547988
##     class counts:     7     8
##    probabilities: 0.467 0.533 
## 
## Node number 11940: 11 observations
##   predicted class=N  expected loss=0.1818182  P(node) =0.01135191
##     class counts:     9     2
##    probabilities: 0.818 0.182 
## 
## Node number 11941: 43 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.4186047  P(node) =0.04437564
##     class counts:    25    18
##    probabilities: 0.581 0.419 
##   left son=23882 (36 obs) right son=23883 (7 obs)
##   Primary splits:
##       startprice.diff < 7.505582  to the left,  improve=0.39055, (0 missing)
## 
## Node number 23882: 36 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.3888889  P(node) =0.0371517
##     class counts:    22    14
##    probabilities: 0.611 0.389 
##   left son=47764 (7 obs) right son=47765 (29 obs)
##   Primary splits:
##       startprice.diff < 4.593603  to the right, improve=1.051998, (0 missing)
## 
## Node number 23883: 7 observations
##   predicted class=Y  expected loss=0.4285714  P(node) =0.007223942
##     class counts:     3     4
##    probabilities: 0.429 0.571 
## 
## Node number 47764: 7 observations
##   predicted class=N  expected loss=0.1428571  P(node) =0.007223942
##     class counts:     6     1
##    probabilities: 0.857 0.143 
## 
## Node number 47765: 29 observations,    complexity param=0.001118568
##   predicted class=N  expected loss=0.4482759  P(node) =0.02992776
##     class counts:    16    13
##    probabilities: 0.552 0.448 
##   left son=95530 (19 obs) right son=95531 (10 obs)
##   Primary splits:
##       startprice.diff < 1.154237  to the left,  improve=0.7027223, (0 missing)
## 
## Node number 95530: 19 observations
##   predicted class=N  expected loss=0.3684211  P(node) =0.01960784
##     class counts:    12     7
##    probabilities: 0.632 0.368 
## 
## Node number 95531: 10 observations
##   predicted class=Y  expected loss=0.4  P(node) =0.01031992
##     class counts:     4     6
##    probabilities: 0.400 0.600 
## 
## n= 969 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##     1) root 969 447 N (0.53869969 0.46130031)  
##       2) biddable< 0.5 539 114 N (0.78849722 0.21150278)  
##         4) startprice.diff>=40.80806 167   7 N (0.95808383 0.04191617) *
##         5) startprice.diff< 40.80806 372 107 N (0.71236559 0.28763441)  
##          10) startprice.diff< -35.3304 128  13 N (0.89843750 0.10156250) *
##          11) startprice.diff>=-35.3304 244  94 N (0.61475410 0.38524590)  
##            22) startprice.diff>=20.77843 65  18 N (0.72307692 0.27692308)  
##              44) startprice.diff< 37.95043 58  14 N (0.75862069 0.24137931)  
##                88) startprice.diff>=36.35647 9   0 N (1.00000000 0.00000000) *
##                89) startprice.diff< 36.35647 49  14 N (0.71428571 0.28571429)  
##                 178) startprice.diff< 24.4582 8   1 N (0.87500000 0.12500000) *
##                 179) startprice.diff>=24.4582 41  13 N (0.68292683 0.31707317)  
##                   358) startprice.diff>=30.11884 18   4 N (0.77777778 0.22222222) *
##                   359) startprice.diff< 30.11884 23   9 N (0.60869565 0.39130435)  
##                     718) startprice.diff< 28.47301 16   5 N (0.68750000 0.31250000) *
##                     719) startprice.diff>=28.47301 7   3 Y (0.42857143 0.57142857) *
##              45) startprice.diff>=37.95043 7   3 Y (0.42857143 0.57142857) *
##            23) startprice.diff< 20.77843 179  76 N (0.57541899 0.42458101)  
##              46) startprice.diff>=-31.00081 168  69 N (0.58928571 0.41071429)  
##                92) startprice.diff< -21.8033 15   2 N (0.86666667 0.13333333) *
##                93) startprice.diff>=-21.8033 153  67 N (0.56209150 0.43790850)  
##                 186) startprice.diff>=-15.07226 129  53 N (0.58914729 0.41085271)  
##                   372) startprice.diff< -8.72155 27   7 N (0.74074074 0.25925926) *
##                   373) startprice.diff>=-8.72155 102  46 N (0.54901961 0.45098039)  
##                     746) startprice.diff>=-2.461955 88  35 N (0.60227273 0.39772727)  
##                      1492) startprice.diff< 19.13936 78  29 N (0.62820513 0.37179487)  
##                        2984) startprice.diff>=16.92743 9   1 N (0.88888889 0.11111111) *
##                        2985) startprice.diff< 16.92743 69  28 N (0.59420290 0.40579710)  
##                          5970) startprice.diff< 12.81441 54  20 N (0.62962963 0.37037037)  
##                           11940) startprice.diff>=9.7053 11   2 N (0.81818182 0.18181818) *
##                           11941) startprice.diff< 9.7053 43  18 N (0.58139535 0.41860465)  
##                             23882) startprice.diff< 7.505582 36  14 N (0.61111111 0.38888889)  
##                               47764) startprice.diff>=4.593603 7   1 N (0.85714286 0.14285714) *
##                               47765) startprice.diff< 4.593603 29  13 N (0.55172414 0.44827586)  
##                                 95530) startprice.diff< 1.154237 19   7 N (0.63157895 0.36842105) *
##                                 95531) startprice.diff>=1.154237 10   4 Y (0.40000000 0.60000000) *
##                             23883) startprice.diff>=7.505582 7   3 Y (0.42857143 0.57142857) *
##                          5971) startprice.diff>=12.81441 15   7 Y (0.46666667 0.53333333) *
##                      1493) startprice.diff>=19.13936 10   4 Y (0.40000000 0.60000000) *
##                     747) startprice.diff< -2.461955 14   3 Y (0.21428571 0.78571429) *
##                 187) startprice.diff< -15.07226 24  10 Y (0.41666667 0.58333333)  
##                   374) startprice.diff< -16.81039 15   7 N (0.53333333 0.46666667) *
##                   375) startprice.diff>=-16.81039 9   2 Y (0.22222222 0.77777778) *
##              47) startprice.diff< -31.00081 11   4 Y (0.36363636 0.63636364) *
##       3) biddable>=0.5 430  97 Y (0.22558140 0.77441860)  
##         6) startprice.diff>=63.51092 80  10 N (0.87500000 0.12500000) *
##         7) startprice.diff< 63.51092 350  27 Y (0.07714286 0.92285714)  
##          14) startprice.diff>=44.72834 24   8 Y (0.33333333 0.66666667)  
##            28) startprice.diff< 47.96221 7   2 N (0.71428571 0.28571429) *
##            29) startprice.diff>=47.96221 17   3 Y (0.17647059 0.82352941) *
##          15) startprice.diff< 44.72834 326  19 Y (0.05828221 0.94171779) *
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7096774
## 3        0.2 0.8350305
## 4        0.3 0.8537634
## 5        0.4 0.8603352
## 6        0.5 0.8590909
## 7        0.6 0.8398058
## 8        0.7 0.8339483
## 9        0.8 0.8126582
## 10       0.9 0.7943079
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1         N                                           459
## 2         Y                                            62
##   sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1                                            63
## 2                                           385
##          Prediction
## Reference   N   Y
##         N 459  63
##         Y  62 385
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.710010e-01   7.404889e-01   8.482486e-01   8.914697e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##  5.246976e-109   1.000000e+00 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.7082601
## 3        0.2 0.7955556
## 4        0.3 0.8130841
## 5        0.4 0.8105516
## 6        0.5 0.8117359
## 7        0.6 0.8098318
## 8        0.7 0.8130719
## 9        0.8 0.8119891
## 10       0.9 0.8099861
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.3000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1         N                                           382
## 2         Y                                            65
##   sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1                                            95
## 2                                           348
##          Prediction
## Reference   N   Y
##         N 382  95
##         Y  65 348
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.202247e-01   6.403332e-01   7.933882e-01   8.449213e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   4.501677e-71   2.186809e-02 
##                    model_id model_method                     feats
## 1 Max.cor.Y.cv.0.cp.0.rpart        rpart biddable, startprice.diff
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               0                      0.471                 0.008
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.9238966                    0.4       0.8603352         0.871001
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8482486             0.8914697     0.7404889   0.8997924
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.3       0.8130841        0.8202247
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.7933882             0.8449213     0.6403332
if (glb_is_regression || glb_is_binomial) # For multinomials this model will be run next by default
ret_lst <- myfit_mdl(model_id="Max.cor.Y", 
                        model_method="rpart",
                     model_type=glb_model_type,
                        indep_vars_vctr=max_cor_y_x_vars,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                        n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.rpart"
## [1] "    indep_vars: biddable, startprice.diff"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00419 on full training set
## Warning in myfit_mdl(model_id = "Max.cor.Y", model_method = "rpart",
## model_type = glb_model_type, : model's bestTune found at an extreme of
## tuneGrid for parameter: cp

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 969 
## 
##            CP nsplit rel error
## 1 0.527964206      0 1.0000000
## 2 0.134228188      1 0.4720358
## 3 0.004194631      2 0.3378076
## 
## Variable importance
##        biddable startprice.diff 
##              64              36 
## 
## Node number 1: 969 observations,    complexity param=0.5279642
##   predicted class=N  expected loss=0.4613003  P(node) =1
##     class counts:   522   447
##    probabilities: 0.539 0.461 
##   left son=2 (539 obs) right son=3 (430 obs)
##   Primary splits:
##       biddable        < 0.5      to the left,  improve=151.58290, (0 missing)
##       startprice.diff < 62.89456 to the right, improve= 82.96307, (0 missing)
##   Surrogate splits:
##       startprice.diff < 250.1071 to the left,  agree=0.562, adj=0.014, (0 split)
## 
## Node number 2: 539 observations
##   predicted class=N  expected loss=0.2115028  P(node) =0.5562436
##     class counts:   425   114
##    probabilities: 0.788 0.212 
## 
## Node number 3: 430 observations,    complexity param=0.1342282
##   predicted class=Y  expected loss=0.2255814  P(node) =0.4437564
##     class counts:    97   333
##    probabilities: 0.226 0.774 
##   left son=6 (80 obs) right son=7 (350 obs)
##   Primary splits:
##       startprice.diff < 63.51092 to the right, improve=82.90292, (0 missing)
## 
## Node number 6: 80 observations
##   predicted class=N  expected loss=0.125  P(node) =0.08255934
##     class counts:    70    10
##    probabilities: 0.875 0.125 
## 
## Node number 7: 350 observations
##   predicted class=Y  expected loss=0.07714286  P(node) =0.3611971
##     class counts:    27   323
##    probabilities: 0.077 0.923 
## 
## n= 969 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 969 447 N (0.53869969 0.46130031)  
##   2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
##   3) biddable>=0.5 430  97 Y (0.22558140 0.77441860)  
##     6) startprice.diff>=63.51092 80  10 N (0.87500000 0.12500000) *
##     7) startprice.diff< 63.51092 350  27 Y (0.07714286 0.92285714) *
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6313559
## 3        0.2 0.6541916
## 4        0.3 0.8105395
## 5        0.4 0.8105395
## 6        0.5 0.8105395
## 7        0.6 0.8105395
## 8        0.7 0.8105395
## 9        0.8 0.8105395
## 10       0.9 0.8105395
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Max.cor.Y.rpart.N
## 1         N                                 495
## 2         Y                                 124
##   sold.fctr.predict.Max.cor.Y.rpart.Y
## 1                                  27
## 2                                 323
##          Prediction
## Reference   N   Y
##         N 495  27
##         Y 124 323
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.441692e-01   6.814949e-01   8.197763e-01   8.664485e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.762753e-90   5.612287e-15 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6339217
## 3        0.2 0.6633907
## 4        0.3 0.8102981
## 5        0.4 0.8102981
## 6        0.5 0.8102981
## 7        0.6 0.8102981
## 8        0.7 0.8102981
## 9        0.8 0.8102981
## 10       0.9 0.8102981
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Max.cor.Y.rpart.N
## 1         N                                 451
## 2         Y                                 114
##   sold.fctr.predict.Max.cor.Y.rpart.Y
## 1                                  26
## 2                                 299
##          Prediction
## Reference   N   Y
##         N 451  26
##         Y 114 299
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.426966e-01   6.791719e-01   8.170871e-01   8.660125e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.090657e-83   1.940362e-13 
##          model_id model_method                     feats max.nTuningRuns
## 1 Max.cor.Y.rpart        rpart biddable, startprice.diff               3
##   min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1                      0.959                 0.012   0.8434283
##   opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1                    0.9       0.8105395        0.8276574
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8197763             0.8664485     0.6497643   0.8469855
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.9       0.8102981        0.8426966
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8170871             0.8660125     0.6791719
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01087271      0.02515063
# Used to compare vs. Interactions.High.cor.Y and/or Max.cor.Y.TmSrs
ret_lst <- myfit_mdl(model_id="Max.cor.Y", 
                        model_method=ifelse(glb_is_regression, "lm", 
                                        ifelse(glb_is_binomial, "glm", "rpart")),
                     model_type=glb_model_type,
                        indep_vars_vctr=max_cor_y_x_vars,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                        n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.glm"
## [1] "    indep_vars: biddable, startprice.diff"
## Aggregating results
## Fitting final model on full training set

## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3288  -0.7074  -0.2424   0.5858   2.7650  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.318196   0.113712 -11.592   <2e-16 ***
## biddable         3.077594   0.185065  16.630   <2e-16 ***
## startprice.diff -0.011328   0.001175  -9.639   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  877.46  on 966  degrees of freedom
## AIC: 883.46
## 
## Number of Fisher Scoring iterations: 5
## 
## [1] "    calling mypredict_mdl for fit:"

##    threshold    f.score
## 1        0.0 0.63135593
## 2        0.1 0.68725869
## 3        0.2 0.71942446
## 4        0.3 0.71295337
## 5        0.4 0.74831461
## 6        0.5 0.77083333
## 7        0.6 0.79616307
## 8        0.7 0.80593325
## 9        0.8 0.77165354
## 10       0.9 0.08102345
## 11       1.0 0.00000000

## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Max.cor.Y.glm.N
## 1         N                               486
## 2         Y                               121
##   sold.fctr.predict.Max.cor.Y.glm.Y
## 1                                36
## 2                               326
##          Prediction
## Reference   N   Y
##         N 486  36
##         Y 121 326
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.379773e-01   6.694866e-01   8.132413e-01   8.606386e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.523362e-86   2.028877e-11 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold    f.score
## 1        0.0 0.63392172
## 2        0.1 0.68661679
## 3        0.2 0.72303207
## 4        0.3 0.72108844
## 5        0.4 0.74673008
## 6        0.5 0.77057357
## 7        0.6 0.79021879
## 8        0.7 0.80478088
## 9        0.8 0.79096045
## 10       0.9 0.06960557
## 11       1.0 0.00000000

## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Max.cor.Y.glm.N
## 1         N                               440
## 2         Y                               110
##   sold.fctr.predict.Max.cor.Y.glm.Y
## 1                                37
## 2                               303
##          Prediction
## Reference   N   Y
##         N 440  37
##         Y 110 303
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.348315e-01   6.639612e-01   8.087745e-01   8.586487e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   4.091305e-79   2.877120e-09 
##        model_id model_method                     feats max.nTuningRuns
## 1 Max.cor.Y.glm          glm biddable, startprice.diff               1
##   min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1                      0.955                 0.013   0.8591461
##   opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1                    0.7       0.8059333        0.7987616
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8132413             0.8606386     0.5929577   0.8659702
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.7       0.8047809        0.8348315
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.8087745             0.8586487     0.6639612    883.4623
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.03879246      0.07822035
if (!is.null(glb_date_vars) && 
    (sum(grepl(paste(glb_date_vars, "\\.day\\.minutes\\.poly\\.", sep=""),
               names(glb_allobs_df))) > 0)) {
# ret_lst <- myfit_mdl(model_id="Max.cor.Y.TmSrs.poly1", 
#                         model_method=ifelse(glb_is_regression, "lm", 
#                                         ifelse(glb_is_binomial, "glm", "rpart")),
#                      model_type=glb_model_type,
#                         indep_vars_vctr=c(max_cor_y_x_vars, paste0(glb_date_vars, ".day.minutes")),
#                         rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
#                         fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
#                         n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
# 
ret_lst <- myfit_mdl(model_id="Max.cor.Y.TmSrs.poly", 
                        model_method=ifelse(glb_is_regression, "lm", 
                                        ifelse(glb_is_binomial, "glm", "rpart")),
                     model_type=glb_model_type,
                        indep_vars_vctr=c(max_cor_y_x_vars, 
            grep(paste(glb_date_vars, "\\.day\\.minutes\\.poly\\.", sep=""),
                        names(glb_allobs_df), value=TRUE)),
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                        n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
}

# Interactions.High.cor.Y
if (length(int_feats <- setdiff(unique(glb_feats_df$cor.high.X), NA)) > 0) {
    # lm & glm handle interaction terms; rpart & rf do not
    if (glb_is_regression || glb_is_binomial) {
        indep_vars_vctr <- 
            c(max_cor_y_x_vars, paste(max_cor_y_x_vars[1], int_feats, sep=":"))            
    } else { indep_vars_vctr <- union(max_cor_y_x_vars, int_feats) }
    
    ret_lst <- myfit_mdl(model_id="Interact.High.cor.Y", 
                            model_method=ifelse(glb_is_regression, "lm", 
                                        ifelse(glb_is_binomial, "glm", "rpart")),
                         model_type=glb_model_type,
                            indep_vars_vctr,
                            glb_rsp_var, glb_rsp_var_out,
                            fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                            n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)             
}    
## [1] "fitting model: Interact.High.cor.Y.glm"
## [1] "    indep_vars: biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log"
## Aggregating results
## Fitting final model on full training set

## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3460  -0.7080  -0.2415   0.5570   2.7490  
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)                        -1.315185   0.113499 -11.588   <2e-16
## biddable                            3.431627   1.432235   2.396   0.0166
## startprice.diff                    -0.011135   0.001186  -9.389   <2e-16
## `biddable:D.terms.n.post.stop`     -0.472748   0.390367  -1.211   0.2259
## `biddable:D.TfIdf.sum.post.stem`    0.272868   0.214844   1.270   0.2041
## `biddable:D.ratio.nstopwrds.nwrds` -0.196093   1.423291  -0.138   0.8904
## `biddable:D.npnct06.log`           -0.443007   0.765464  -0.579   0.5628
## `biddable:D.nchrs.log`             -2.042901   1.323123  -1.544   0.1226
## `biddable:D.terms.n.post.stop.log` 13.051918  12.683531   1.029   0.3035
## `biddable:cellular.fctr1`           0.121261   0.324337   0.374   0.7085
## `biddable:cellular.fctrUnknown`    -0.935810   0.377576  -2.478   0.0132
## `biddable:D.nwrds.unq.log`         -8.093449  11.778183  -0.687   0.4920
##                                       
## (Intercept)                        ***
## biddable                           *  
## startprice.diff                    ***
## `biddable:D.terms.n.post.stop`        
## `biddable:D.TfIdf.sum.post.stem`      
## `biddable:D.ratio.nstopwrds.nwrds`    
## `biddable:D.npnct06.log`              
## `biddable:D.nchrs.log`                
## `biddable:D.terms.n.post.stop.log`    
## `biddable:cellular.fctr1`             
## `biddable:cellular.fctrUnknown`    *  
## `biddable:D.nwrds.unq.log`            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  863.84  on 957  degrees of freedom
## AIC: 887.84
## 
## Number of Fisher Scoring iterations: 5
## 
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6867284
## 3        0.2 0.7212230
## 4        0.3 0.7160752
## 5        0.4 0.7483146
## 6        0.5 0.7720930
## 7        0.6 0.7908102
## 8        0.7 0.7904642
## 9        0.8 0.7425474
## 10       0.9 0.2758621
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Interact.High.cor.Y.glm.N
## 1         N                                         469
## 2         Y                                         120
##   sold.fctr.predict.Interact.High.cor.Y.glm.Y
## 1                                          53
## 2                                         327
##          Prediction
## Reference   N   Y
##         N 469  53
##         Y 120 327
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.214654e-01   6.368687e-01   7.958716e-01   8.450880e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.291609e-76   5.224287e-07 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6854772
## 3        0.2 0.7235694
## 4        0.3 0.7186441
## 5        0.4 0.7467301
## 6        0.5 0.7686567
## 7        0.6 0.7865459
## 8        0.7 0.7838926
## 9        0.8 0.7349927
## 10       0.9 0.2541667
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Interact.High.cor.Y.glm.N
## 1         N                                         421
## 2         Y                                         109
##   sold.fctr.predict.Interact.High.cor.Y.glm.Y
## 1                                          56
## 2                                         304
##          Prediction
## Reference   N   Y
##         N 421  56
##         Y 109 304
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.146067e-01   6.240496e-01   7.874870e-01   8.396247e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   3.944312e-68   5.161425e-05 
##                  model_id model_method
## 1 Interact.High.cor.Y.glm          glm
##                                                                                                                                                                                                                                                         feats
## 1 biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      0.991                 0.015
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1    0.861739                    0.6       0.7908102        0.7997936
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.7958716              0.845088      0.594392   0.8576352
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.6       0.7865459        0.8146067
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1              0.787487             0.8396247     0.6240496    887.8417
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.03719468      0.07559072
# Low.cor.X
# if (glb_is_classification && glb_is_binomial)
#     indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) & 
#                                             is.ConditionalX.y & 
#                                             (exclude.as.feat != 1))[, "id"] else
indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) & !myNearZV & 
                              (exclude.as.feat != 1))[, "id"]  
myadjust_interaction_feats <- function(vars_vctr) {
    for (feat in subset(glb_feats_df, !is.na(interaction.feat))$id)
        if (feat %in% vars_vctr)
            vars_vctr <- union(setdiff(vars_vctr, feat), 
                paste0(glb_feats_df[glb_feats_df$id == feat, "interaction.feat"], ":",
                       feat))
    return(vars_vctr)
}
indep_vars_vctr <- myadjust_interaction_feats(indep_vars_vctr)
ret_lst <- myfit_mdl(model_id="Low.cor.X", 
                        model_method=ifelse(glb_is_regression, "lm", 
                                        ifelse(glb_is_binomial, "glm", "rpart")),
                        indep_vars_vctr=indep_vars_vctr,
                        model_type=glb_model_type,                     
                        glb_rsp_var, glb_rsp_var_out,
                        fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                        n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Low.cor.X.glm"
## [1] "    indep_vars: biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
##   354, 619

## Warning: not plotting observations with leverage one:
##   354, 619

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7920  -0.6214  -0.1062   0.4960   3.2701  
## 
## Coefficients: (53 not defined because of singularities)
##                                                      Estimate Std. Error
## (Intercept)                                        -7.334e+00  6.651e+00
## biddable                                            3.077e+00  2.274e-01
## D.npnct15.log                                       1.677e+00  9.240e-01
## D.npnct03.log                                       3.299e-01  1.535e+00
## D.terms.n.stem.stop.Ratio                           4.797e+00  6.085e+00
## D.ratio.sum.TfIdf.nwrds                            -4.877e-01  4.544e-01
## D.npnct01.log                                       2.717e-01  7.962e-01
## .rnorm                                              6.336e-03  9.519e-02
## D.TfIdf.sum.stem.stop.Ratio                         1.235e+00  4.224e+00
## storage.fctr16                                     -1.215e-01  5.683e-01
## storage.fctr32                                     -2.054e-01  5.949e-01
## storage.fctr64                                      3.864e-01  5.999e-01
## storage.fctrUnknown                                 2.769e-01  7.260e-01
## D.npnct11.log                                       2.648e-01  3.885e-01
## D.npnct10.log                                      -1.511e+00  1.940e+00
## `prdl.my.descr.fctrUnknown#1`                       7.109e-01  1.118e+00
## `prdl.my.descr.fctriPad 1#0`                        7.377e-01  5.739e-01
## `prdl.my.descr.fctriPad 1#1`                        2.353e+00  1.186e+00
## `prdl.my.descr.fctriPad 2#0`                        1.744e+00  6.851e-01
## `prdl.my.descr.fctriPad 2#1`                        1.857e+00  1.172e+00
## `prdl.my.descr.fctriPad 3+#0`                       1.173e+00  5.493e-01
## `prdl.my.descr.fctriPad 3+#1`                       8.844e-01  1.185e+00
## `prdl.my.descr.fctriPadAir#0`                       3.489e-01  5.258e-01
## `prdl.my.descr.fctriPadAir#1`                       1.775e+00  1.134e+00
## `prdl.my.descr.fctriPadmini 2+#0`                   2.045e-01  5.371e-01
## `prdl.my.descr.fctriPadmini 2+#1`                   1.785e+00  1.630e+00
## `prdl.my.descr.fctriPadmini#0`                      5.642e-01  5.201e-01
## `prdl.my.descr.fctriPadmini#1`                      1.643e+00  1.328e+00
## color.fctrGold                                      3.371e-02  5.518e-01
## `color.fctrSpace Gray`                             -1.404e-01  3.796e-01
## color.fctrUnknown                                  -2.897e-01  2.730e-01
## color.fctrWhite                                    -2.466e-01  3.007e-01
## D.npnct08.log                                       6.281e-01  7.927e-01
## D.npnct06.log                                      -1.974e+00  9.406e-01
## D.npnct28.log                                      -2.853e+00  3.334e+03
## D.npnct12.log                                      -9.026e-01  8.708e-01
## D.npnct09.log                                      -9.265e+00  2.854e+03
## D.ndgts.log                                         6.392e-01  4.211e-01
## cellular.fctr1                                     -3.674e-02  2.314e-01
## cellular.fctrUnknown                               -6.176e-01  4.605e-01
## D.npnct14.log                                      -1.013e+00  9.205e-01
## D.terms.n.post.stop                                -1.688e-01  8.496e-02
## D.npnct05.log                                      -3.690e+00  1.766e+00
## `condition.fctrFor parts or not working`            6.088e-01  3.918e-01
## `condition.fctrManufacturer refurbished`            8.394e-01  5.850e-01
## condition.fctrNew                                  -2.849e-01  3.099e-01
## `condition.fctrNew other (see details)`             8.380e-01  5.025e-01
## `condition.fctrSeller refurbished`                 -6.690e-01  4.741e-01
## idseq.my                                           -1.261e-04  2.150e-04
## startprice.diff                                    -1.354e-02  1.547e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`      1.645e+00  1.004e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`       6.192e-02  1.058e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`      -8.743e-02  9.407e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`      1.082e+00  8.614e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`      7.218e-02  7.851e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`  6.090e-01  1.917e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`     6.295e-02  1.408e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`      1.142e+00  2.018e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`       7.483e-02  1.232e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`      -5.813e-01  1.279e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`      4.610e-01  1.360e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`     -6.393e-01  1.223e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`  7.449e-01  2.220e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`     2.777e-01  1.264e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`      1.803e+01  2.669e+03
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`       7.096e-02  1.155e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`       6.105e-01  1.113e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`     -1.451e+01  8.458e+02
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`     -6.011e-01  1.130e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.433e+01  3.956e+03
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`     1.722e+01  1.542e+03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`       1.681e+01  1.401e+03
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`       6.595e-02  1.161e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`      2.866e+00  9.585e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`      9.570e-01  2.942e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`     1.658e+00  1.544e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`      -1.513e+00  1.413e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`      2.573e-01  1.439e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`     7.561e-01  1.431e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`     -1.582e+01  2.280e+03
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`    -3.387e-01  2.179e+00
##                                                    z value Pr(>|z|)    
## (Intercept)                                         -1.103  0.27013    
## biddable                                            13.530  < 2e-16 ***
## D.npnct15.log                                        1.815  0.06953 .  
## D.npnct03.log                                        0.215  0.82984    
## D.terms.n.stem.stop.Ratio                            0.788  0.43048    
## D.ratio.sum.TfIdf.nwrds                             -1.073  0.28314    
## D.npnct01.log                                        0.341  0.73288    
## .rnorm                                               0.067  0.94693    
## D.TfIdf.sum.stem.stop.Ratio                          0.292  0.76997    
## storage.fctr16                                      -0.214  0.83066    
## storage.fctr32                                      -0.345  0.72987    
## storage.fctr64                                       0.644  0.51952    
## storage.fctrUnknown                                  0.381  0.70288    
## D.npnct11.log                                        0.681  0.49558    
## D.npnct10.log                                       -0.779  0.43605    
## `prdl.my.descr.fctrUnknown#1`                        0.636  0.52476    
## `prdl.my.descr.fctriPad 1#0`                         1.285  0.19869    
## `prdl.my.descr.fctriPad 1#1`                         1.985  0.04719 *  
## `prdl.my.descr.fctriPad 2#0`                         2.545  0.01093 *  
## `prdl.my.descr.fctriPad 2#1`                         1.584  0.11317    
## `prdl.my.descr.fctriPad 3+#0`                        2.135  0.03280 *  
## `prdl.my.descr.fctriPad 3+#1`                        0.746  0.45540    
## `prdl.my.descr.fctriPadAir#0`                        0.664  0.50697    
## `prdl.my.descr.fctriPadAir#1`                        1.566  0.11735    
## `prdl.my.descr.fctriPadmini 2+#0`                    0.381  0.70337    
## `prdl.my.descr.fctriPadmini 2+#1`                    1.095  0.27357    
## `prdl.my.descr.fctriPadmini#0`                       1.085  0.27801    
## `prdl.my.descr.fctriPadmini#1`                       1.237  0.21601    
## color.fctrGold                                       0.061  0.95128    
## `color.fctrSpace Gray`                              -0.370  0.71149    
## color.fctrUnknown                                   -1.062  0.28845    
## color.fctrWhite                                     -0.820  0.41217    
## D.npnct08.log                                        0.792  0.42810    
## D.npnct06.log                                       -2.099  0.03581 *  
## D.npnct28.log                                       -0.001  0.99932    
## D.npnct12.log                                       -1.036  0.29997    
## D.npnct09.log                                       -0.003  0.99741    
## D.ndgts.log                                          1.518  0.12907    
## cellular.fctr1                                      -0.159  0.87385    
## cellular.fctrUnknown                                -1.341  0.17992    
## D.npnct14.log                                       -1.101  0.27106    
## D.terms.n.post.stop                                 -1.987  0.04690 *  
## D.npnct05.log                                       -2.089  0.03667 *  
## `condition.fctrFor parts or not working`             1.554  0.12020    
## `condition.fctrManufacturer refurbished`             1.435  0.15132    
## condition.fctrNew                                   -0.919  0.35786    
## `condition.fctrNew other (see details)`              1.668  0.09536 .  
## `condition.fctrSeller refurbished`                  -1.411  0.15819    
## idseq.my                                            -0.586  0.55761    
## startprice.diff                                     -8.754  < 2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`       1.639  0.10132    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`        0.058  0.95335    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`       -0.093  0.92595    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`       1.256  0.20896    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`       0.092  0.92675    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`   0.318  0.75071    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`      0.045  0.96434    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`       0.566  0.57131    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`        0.061  0.95156    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`       -0.454  0.64958    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`       0.339  0.73473    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`      -0.523  0.60112    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`   0.335  0.73727    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`      0.220  0.82603    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`       0.007  0.99461    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`        0.061  0.95099    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`        0.549  0.58330    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`      -0.017  0.98631    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`      -0.532  0.59484    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`  -0.004  0.99711    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`      0.011  0.99109    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`        0.012  0.99043    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`        0.057  0.95471    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`       2.990  0.00279 ** 
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`       0.325  0.74495    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`      1.073  0.28307    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`       -1.071  0.28408    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`       0.179  0.85809    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`      0.529  0.59712    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`      -0.007  0.99446    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`     -0.155  0.87647    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  752.13  on 888  degrees of freedom
## AIC: 914.13
## 
## Number of Fisher Scoring iterations: 16
## 
## [1] "    calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7225914
## 3        0.2 0.7670980
## 4        0.3 0.8012295
## 5        0.4 0.8096810
## 6        0.5 0.8101852
## 7        0.6 0.8043478
## 8        0.7 0.7863464
## 9        0.8 0.7419355
## 10       0.9 0.5669291
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Low.cor.X.glm.N
## 1         N                               455
## 2         Y                                97
##   sold.fctr.predict.Low.cor.X.glm.Y
## 1                                67
## 2                               350
##          Prediction
## Reference   N   Y
##         N 455  67
##         Y  97 350
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.307534e-01   6.578169e-01   8.056321e-01   8.538452e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   4.228202e-82   2.354218e-02 
## [1] "    calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6990291
## 3        0.2 0.7308838
## 4        0.3 0.7407407
## 5        0.4 0.7482517
## 6        0.5 0.7600487
## 7        0.6 0.7582697
## 8        0.7 0.7456258
## 9        0.8 0.6577778
## 10       0.9 0.4745167
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Low.cor.X.glm.N
## 1         N                               381
## 2         Y                               101
##   sold.fctr.predict.Low.cor.X.glm.Y
## 1                                96
## 2                               312
##          Prediction
## Reference   N   Y
##         N 381  96
##         Y 101 312
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.786517e-01   5.546405e-01   7.499158e-01   8.055293e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   3.954556e-51   7.756532e-01 
##        model_id model_method
## 1 Low.cor.X.glm          glm
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                 feats
## 1 biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      1.741                  0.39
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.9028388                    0.5       0.8101852        0.7688338
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8056321             0.8538452     0.5344407   0.8382546
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.7600487        0.7786517
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.7499158             0.8055293     0.5546405     914.127
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01966208      0.03929384
rm(ret_lst)

glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
##         label step_major step_minor    bgn    end elapsed
## 10 fit.models          7          0 67.216 92.532  25.317
## 11 fit.models          7          1 92.533     NA      NA
fit.models_1_chunk_df <- myadd_chunk(NULL, "fit.models_1_bgn")
##              label step_major step_minor   bgn end elapsed
## 1 fit.models_1_bgn          1          0 96.37  NA      NA
# Options:
#   1. rpart & rf manual tuning
#   2. rf without pca (default: with pca)

#stop(here"); sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df
#glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df

# All X that is not user excluded
for (model_id_pfx in c("All.X", "All.Interact.X")) {
#model_id_pfx <- "All.X"
    indep_vars_vctr <- subset(glb_feats_df, !myNearZV &
                                                (exclude.as.feat != 1))[, "id"]
    if (model_id_pfx == "All.Interact.X") {
        # !_sp
        interact_vars_vctr <- c(
            "idseq.my", "D.ratio.sum.TfIdf.nwrds", "D.TfIdf.sum.stem.stop.Ratio",
            "D.npnct15.log", "D.npnct03.log", "D.nwrds.log", "D.nchrs.log")
        indep_vars_vctr <- union(setdiff(indep_vars_vctr, interact_vars_vctr),
                                paste(glb_category_var, interact_vars_vctr, 
                            sep=ifelse(grepl("\\.fctr", glb_category_var), "*", ".fctr*")))        
        indep_vars_vctr <- union(setdiff(indep_vars_vctr, 
                        c("startprice.diff", "biddable", "cellular.fctr", "carrier.fctr")),
                            c("startprice.diff*biddable", "cellular.fctr*carrier.fctr"))
        ###
        # _sp only
#         interact_vars_vctr <- c(
#             "D.nchrs.log", "D.TfIdf.sum.stem.stop.Ratio", 
#                 "D.npnct16.log", "D.npnct01.log", "D.nstopwrds.log", "D.npnct08.log",
#                 "D.terms.n.post.stop", "D.terms.n.post.stem", 
#                                 "biddable", "condition.fctr", 
#                                 # "cellular.fctr", "carrier.fctr",
#                                 "color.fctr", "storage.fctr", "idseq.my")
#         indep_vars_vctr <- union(setdiff(indep_vars_vctr, interact_vars_vctr),
#                                 paste(glb_category_var, interact_vars_vctr, 
#                             sep=ifelse(grepl("\\.fctr", glb_category_var), "*", ".fctr*")))
#         indep_vars_vctr <- union(setdiff(indep_vars_vctr, 
#                         c("cellular.fctr", "carrier.fctr")),
#                             c("cellular.fctr*carrier.fctr"))                            
        ###        
    }
    indep_vars_vctr <- myadjust_interaction_feats(indep_vars_vctr)
    
    #stop(here")
    for (method in glb_models_method_vctr) {
        fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df, 
                                    paste0("fit.models_1_", method), major.inc=TRUE)
        if (method %in% c("rpart", "rf")) {
            # rpart:    fubar's the tree
            # rf:       skip the scenario w/ .rnorm for speed
            indep_vars_vctr <- setdiff(indep_vars_vctr, c(".rnorm"))
            model_id <- paste0(model_id_pfx, ".no.rnorm")
        } else model_id <- model_id_pfx
        
        ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
                                indep_vars_vctr=indep_vars_vctr,
                                model_type=glb_model_type,
                                rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                                fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                    n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
        
        # If All.X.glm is less accurate than Low.Cor.X.glm
        #   check NA coefficients & filter appropriate terms in indep_vars_vctr
    #     if (method == "glm") {
    #         orig_glm <- glb_models_lst[[paste0(model_id, ".", model_method)]]$finalModel
    #         orig_glm <- glb_models_lst[["All.X.glm"]]$finalModel; print(summary(orig_glm))
    #           vif_orig_glm <- vif(orig_glm); print(vif_orig_glm)
    #           print(vif_orig_glm[!is.na(vif_orig_glm) & (vif_orig_glm == Inf)])
    #           print(which.max(vif_orig_glm))
    #           print(sort(vif_orig_glm[vif_orig_glm >= 1.0e+03], decreasing=TRUE))
    #           glb_fitobs_df[c(1143, 3637, 3953, 4105), c("UniqueID", "Popular", "H.P.quandary", "Headline")]
    #           glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.nchrs.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in%    grep("[HSA]\\.nchrs.log", glb_feats_df$id, value=TRUE), ]
    #           glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.npnct14.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in%    grep("[HSA]\\.npnct14.log", glb_feats_df$id, value=TRUE), ]
    #           glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.T.scen", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in%         grep("[HSA]\\.T.scen", glb_feats_df$id, value=TRUE), ]
    #           glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.P.first", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in%         grep("[HSA]\\.P.first", glb_feats_df$id, value=TRUE), ]
    #           all.equal(glb_allobs_df$S.nuppr.log, glb_allobs_df$A.nuppr.log)
    #           all.equal(glb_allobs_df$S.npnct19.log, glb_allobs_df$A.npnct19.log)
    #           all.equal(glb_allobs_df$S.P.year.colon, glb_allobs_df$A.P.year.colon)
    #           all.equal(glb_allobs_df$S.T.share, glb_allobs_df$A.T.share)
    #           all.equal(glb_allobs_df$H.T.clip, glb_allobs_df$H.P.daily.clip.report)
    #           cor(glb_allobs_df$S.T.herald, glb_allobs_df$S.T.tribun)
    #           mydsp_obs(Abstract.contains="[Dd]iar", cols=("Abstract"), all=TRUE)
    #           mydsp_obs(Abstract.contains="[Ss]hare", cols=("Abstract"), all=TRUE)
    #           subset(glb_feats_df, cor.y.abs <= glb_feats_df[glb_feats_df$id == ".rnorm", "cor.y.abs"])
    #         corxx_mtrx <- cor(data.matrix(glb_allobs_df[, setdiff(names(glb_allobs_df), myfind_chr_cols_df(glb_allobs_df))]), use="pairwise.complete.obs"); abs_corxx_mtrx <- abs(corxx_mtrx); diag(abs_corxx_mtrx) <- 0
    #           which.max(abs_corxx_mtrx["S.T.tribun", ])
    #           abs_corxx_mtrx["A.npnct08.log", "S.npnct08.log"]
    #         step_glm <- step(orig_glm)
    #     }
        # Since caret does not optimize rpart well
    #     if (method == "rpart")
    #         ret_lst <- myfit_mdl(model_id=paste0(model_id_pfx, ".cp.0"), model_method=method,
    #                                 indep_vars_vctr=indep_vars_vctr,
    #                                 model_type=glb_model_type,
    #                                 rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
    #                                 fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,        
    #             n_cv_folds=0, tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
    }
}
##              label step_major step_minor    bgn    end elapsed
## 1 fit.models_1_bgn          1          0 96.370 96.379   0.009
## 2 fit.models_1_glm          2          0 96.379     NA      NA
## [1] "fitting model: All.X.glm"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
##   354, 619, 935

## Warning: not plotting observations with leverage one:
##   354, 619, 935

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5545  -0.6184  -0.0759   0.4772   3.3084  
## 
## Coefficients: (57 not defined because of singularities)
##                                                      Estimate Std. Error
## (Intercept)                                         8.072e+02  3.499e+03
## biddable                                            3.142e+00  2.355e-01
## D.ratio.nstopwrds.nwrds                            -1.480e+01  7.692e+00
## D.npnct15.log                                       1.196e+00  9.929e-01
## D.npnct03.log                                       6.005e-01  1.800e+00
## D.terms.n.stem.stop.Ratio                          -7.755e+02  3.497e+03
## D.ratio.sum.TfIdf.nwrds                            -1.552e+00  1.278e+00
## D.npnct01.log                                      -2.181e-01  9.103e-01
## .rnorm                                              1.393e-02  9.744e-02
## D.TfIdf.sum.stem.stop.Ratio                        -1.866e+01  2.335e+01
## storage.fctr16                                     -1.068e-01  5.884e-01
## storage.fctr32                                     -2.156e-01  6.139e-01
## storage.fctr64                                      4.166e-01  6.179e-01
## storage.fctrUnknown                                 2.144e-01  7.416e-01
## D.npnct11.log                                       1.743e-01  4.535e-01
## D.npnct10.log                                      -9.990e-01  2.020e+00
## D.TfIdf.sum.post.stop                              -2.801e+00  3.350e+00
## D.TfIdf.sum.post.stem                               3.311e+00  3.511e+00
## D.sum.TfIdf                                                NA         NA
## `prdl.my.descr.fctrUnknown#1`                      -9.192e-01  6.700e+00
## `prdl.my.descr.fctriPad 1#0`                        7.271e-01  5.812e-01
## `prdl.my.descr.fctriPad 1#1`                        6.051e-01  6.663e+00
## `prdl.my.descr.fctriPad 2#0`                        1.585e+00  6.915e-01
## `prdl.my.descr.fctriPad 2#1`                       -2.572e-01  6.637e+00
## `prdl.my.descr.fctriPad 3+#0`                       1.166e+00  5.560e-01
## `prdl.my.descr.fctriPad 3+#1`                      -1.127e+00  6.652e+00
## `prdl.my.descr.fctriPadAir#0`                       3.253e-01  5.328e-01
## `prdl.my.descr.fctriPadAir#1`                       3.825e-02  6.648e+00
## `prdl.my.descr.fctriPadmini 2+#0`                   1.514e-01  5.438e-01
## `prdl.my.descr.fctriPadmini 2+#1`                   8.256e-02  6.674e+00
## `prdl.my.descr.fctriPadmini#0`                      4.872e-01  5.278e-01
## `prdl.my.descr.fctriPadmini#1`                     -7.116e-01  6.725e+00
## D.npnct13.log                                      -4.193e-01  4.541e-01
## color.fctrGold                                     -6.916e-02  5.613e-01
## `color.fctrSpace Gray`                             -1.319e-01  3.886e-01
## color.fctrUnknown                                  -3.560e-01  2.815e-01
## color.fctrWhite                                    -2.679e-01  3.073e-01
## D.npnct08.log                                       4.819e-01  8.435e-01
## D.npnct16.log                                       1.740e+00  2.224e+00
## D.npnct24.log                                              NA         NA
## D.nstopwrds.log                                     5.020e+00  2.342e+00
## D.npnct06.log                                      -3.917e+00  2.377e+00
## D.npnct28.log                                      -3.344e+00  3.310e+03
## D.nuppr.log                                        -2.370e+00  5.392e+00
## D.nchrs.log                                         1.963e+00  6.151e+00
## D.nwrds.log                                        -5.166e+00  3.283e+00
## D.npnct12.log                                      -7.432e-01  8.740e-01
## carrier.fctrNone                                    5.695e-01  7.034e-01
## carrier.fctrOther                                   3.091e+01  5.595e+03
## carrier.fctrSprint                                  2.279e-01  7.981e-01
## `carrier.fctrT-Mobile`                             -6.923e-01  1.084e+00
## carrier.fctrUnknown                                -3.964e-02  5.369e-01
## carrier.fctrVerizon                                 8.074e-01  4.802e-01
## D.npnct09.log                                      -9.223e+00  2.854e+03
## D.ndgts.log                                         3.259e-01  6.455e-01
## D.nwrds.unq.log                                     8.435e+02  3.885e+03
## D.terms.n.post.stem.log                                    NA         NA
## D.terms.n.post.stop.log                            -8.454e+02  3.884e+03
## cellular.fctr1                                      3.655e-01  6.355e-01
## cellular.fctrUnknown                                       NA         NA
## D.npnct14.log                                      -1.486e+00  1.009e+00
## D.terms.n.post.stem                                -2.330e+00  2.038e+01
## D.terms.n.post.stop                                 2.348e+00  2.032e+01
## D.npnct05.log                                      -2.742e+00  1.866e+00
## `condition.fctrFor parts or not working`            3.423e-01  4.096e-01
## `condition.fctrManufacturer refurbished`            6.185e-01  5.963e-01
## condition.fctrNew                                  -3.135e-01  3.180e-01
## `condition.fctrNew other (see details)`             7.964e-01  5.189e-01
## `condition.fctrSeller refurbished`                 -6.440e-01  4.927e-01
## idseq.my                                           -7.067e-05  2.192e-04
## startprice.diff                                    -1.398e-02  1.599e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`      1.085e+00  1.113e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`      -1.294e-01  1.119e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`      -1.512e-01  9.948e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`      1.182e+00  8.861e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`      3.162e-01  8.389e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`  1.056e-01  2.085e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`     9.002e-01  1.433e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`      5.414e-01  1.927e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`       5.560e-02  1.276e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`      -3.174e-01  1.343e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`      3.515e-01  1.499e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`     -4.227e-01  1.243e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -8.845e-02  2.347e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`     9.996e-01  1.384e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`      1.764e+01  2.704e+03
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`      -2.491e-01  1.209e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`       9.901e-01  1.112e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`     -1.565e+01  7.660e+02
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`     -9.742e-01  1.210e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.495e+01  3.956e+03
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`     1.781e+01  1.556e+03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`       1.723e+01  1.420e+03
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`      -6.353e-02  1.119e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`      2.672e+00  1.017e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`     -1.423e+01  3.956e+03
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`     1.985e+00  1.643e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`      -1.782e+00  1.426e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`      7.513e-01  1.494e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`             NA         NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`     1.089e+00  1.582e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`              NA         NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`     -1.467e+01  2.282e+03
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`             NA         NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`         NA         NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`         NA         NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`            NA         NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`    -4.614e-01  2.497e+00
##                                                    z value Pr(>|z|)    
## (Intercept)                                          0.231  0.81756    
## biddable                                            13.345  < 2e-16 ***
## D.ratio.nstopwrds.nwrds                             -1.924  0.05441 .  
## D.npnct15.log                                        1.205  0.22820    
## D.npnct03.log                                        0.334  0.73868    
## D.terms.n.stem.stop.Ratio                           -0.222  0.82450    
## D.ratio.sum.TfIdf.nwrds                             -1.215  0.22430    
## D.npnct01.log                                       -0.240  0.81064    
## .rnorm                                               0.143  0.88630    
## D.TfIdf.sum.stem.stop.Ratio                         -0.799  0.42415    
## storage.fctr16                                      -0.182  0.85591    
## storage.fctr32                                      -0.351  0.72549    
## storage.fctr64                                       0.674  0.50022    
## storage.fctrUnknown                                  0.289  0.77249    
## D.npnct11.log                                        0.384  0.70078    
## D.npnct10.log                                       -0.494  0.62100    
## D.TfIdf.sum.post.stop                               -0.836  0.40318    
## D.TfIdf.sum.post.stem                                0.943  0.34559    
## D.sum.TfIdf                                             NA       NA    
## `prdl.my.descr.fctrUnknown#1`                       -0.137  0.89089    
## `prdl.my.descr.fctriPad 1#0`                         1.251  0.21092    
## `prdl.my.descr.fctriPad 1#1`                         0.091  0.92764    
## `prdl.my.descr.fctriPad 2#0`                         2.292  0.02193 *  
## `prdl.my.descr.fctriPad 2#1`                        -0.039  0.96909    
## `prdl.my.descr.fctriPad 3+#0`                        2.098  0.03594 *  
## `prdl.my.descr.fctriPad 3+#1`                       -0.169  0.86549    
## `prdl.my.descr.fctriPadAir#0`                        0.611  0.54141    
## `prdl.my.descr.fctriPadAir#1`                        0.006  0.99541    
## `prdl.my.descr.fctriPadmini 2+#0`                    0.278  0.78069    
## `prdl.my.descr.fctriPadmini 2+#1`                    0.012  0.99013    
## `prdl.my.descr.fctriPadmini#0`                       0.923  0.35601    
## `prdl.my.descr.fctriPadmini#1`                      -0.106  0.91573    
## D.npnct13.log                                       -0.923  0.35588    
## color.fctrGold                                      -0.123  0.90193    
## `color.fctrSpace Gray`                              -0.340  0.73420    
## color.fctrUnknown                                   -1.265  0.20593    
## color.fctrWhite                                     -0.872  0.38329    
## D.npnct08.log                                        0.571  0.56777    
## D.npnct16.log                                        0.783  0.43385    
## D.npnct24.log                                           NA       NA    
## D.nstopwrds.log                                      2.143  0.03208 *  
## D.npnct06.log                                       -1.648  0.09933 .  
## D.npnct28.log                                       -0.001  0.99919    
## D.nuppr.log                                         -0.440  0.66029    
## D.nchrs.log                                          0.319  0.74966    
## D.nwrds.log                                         -1.573  0.11562    
## D.npnct12.log                                       -0.850  0.39510    
## carrier.fctrNone                                     0.810  0.41817    
## carrier.fctrOther                                    0.006  0.99559    
## carrier.fctrSprint                                   0.286  0.77521    
## `carrier.fctrT-Mobile`                              -0.639  0.52313    
## carrier.fctrUnknown                                 -0.074  0.94114    
## carrier.fctrVerizon                                  1.681  0.09271 .  
## D.npnct09.log                                       -0.003  0.99742    
## D.ndgts.log                                          0.505  0.61369    
## D.nwrds.unq.log                                      0.217  0.82810    
## D.terms.n.post.stem.log                                 NA       NA    
## D.terms.n.post.stop.log                             -0.218  0.82769    
## cellular.fctr1                                       0.575  0.56524    
## cellular.fctrUnknown                                    NA       NA    
## D.npnct14.log                                       -1.472  0.14099    
## D.terms.n.post.stem                                 -0.114  0.90897    
## D.terms.n.post.stop                                  0.116  0.90801    
## D.npnct05.log                                       -1.470  0.14168    
## `condition.fctrFor parts or not working`             0.836  0.40331    
## `condition.fctrManufacturer refurbished`             1.037  0.29965    
## condition.fctrNew                                   -0.986  0.32419    
## `condition.fctrNew other (see details)`              1.535  0.12484    
## `condition.fctrSeller refurbished`                  -1.307  0.19125    
## idseq.my                                            -0.322  0.74722    
## startprice.diff                                     -8.746  < 2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`       0.975  0.32964    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`       -0.116  0.90797    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`       -0.152  0.87917    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`       1.334  0.18209    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`       0.377  0.70623    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`   0.051  0.95960    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`      0.628  0.52982    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`       0.281  0.77878    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`        0.044  0.96525    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`       -0.236  0.81313    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`       0.234  0.81462    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`      -0.340  0.73372    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`  -0.038  0.96994    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`      0.722  0.47007    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`       0.007  0.99479    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`       -0.206  0.83679    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`        0.890  0.37346    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`      -0.020  0.98370    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`      -0.805  0.42087    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`  -0.004  0.99699    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`      0.011  0.99087    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`        0.012  0.99031    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`       -0.057  0.95473    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`       2.627  0.00862 ** 
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`      -0.004  0.99713    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`      1.208  0.22690    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`       -1.249  0.21150    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`       0.503  0.61515    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`          NA       NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`      0.688  0.49129    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`           NA       NA    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`      -0.006  0.99487    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`          NA       NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`      NA       NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`      NA       NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`         NA       NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`     -0.185  0.85337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  735.56  on 871  degrees of freedom
## AIC: 931.56
## 
## Number of Fisher Scoring iterations: 16
## 
## [1] "    calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7250000
## 3        0.2 0.7715079
## 4        0.3 0.8016360
## 5        0.4 0.8171806
## 6        0.5 0.8258362
## 7        0.6 0.8082027
## 8        0.7 0.7812895
## 9        0.8 0.7466307
## 10       0.9 0.5978428
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.glm.N sold.fctr.predict.All.X.glm.Y
## 1         N                           460                            62
## 2         Y                            89                           358
##          Prediction
## Reference   N   Y
##         N 460  62
##         Y  89 358
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.441692e-01   6.850937e-01   8.197763e-01   8.664485e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.762753e-90   3.435757e-02 
## [1] "    calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6984698
## 3        0.2 0.7223340
## 4        0.3 0.7372973
## 5        0.4 0.7520185
## 6        0.5 0.7545788
## 7        0.6 0.7512821
## 8        0.7 0.7361299
## 9        0.8 0.6764706
## 10       0.9 0.4939130
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.X.glm.N sold.fctr.predict.All.X.glm.Y
## 1         N                           380                            97
## 2         Y                           104                           309
##          Prediction
## Reference   N   Y
##         N 380  97
##         Y 104 309
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.741573e-01   5.454499e-01   7.452413e-01   8.012453e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   3.249328e-49   6.721440e-01 
##    model_id model_method
## 1 All.X.glm          glm
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      2.172                 0.506
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.9069917                    0.5       0.8258362        0.7647059
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8197763             0.8664485     0.5250728   0.8308232
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.7545788        0.7741573
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.7452413             0.8012453     0.5454499    931.5575
##   max.AccuracySD.fit max.KappaSD.fit
## 1          0.0298565      0.05995988
##                   label step_major step_minor     bgn     end elapsed
## 2      fit.models_1_glm          2          0  96.379 102.276   5.897
## 3 fit.models_1_bayesglm          3          0 102.277      NA      NA
## [1] "fitting model: All.X.bayesglm"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: arm
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## Loading required package: lme4
## 
## arm (Version 1.8-6, built: 2015-7-7)
## 
## Working directory is /Users/bbalaji-2012/Documents/Work/Courses/MIT/Analytics_Edge_15_071x/Assignments/Kaggle_eBay_iPads

## Aggregating results
## Fitting final model on full training set
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5556  -0.6302  -0.1345   0.5021   3.2260  
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                        -4.347e+00  7.319e+00
## biddable                                            3.034e+00  2.212e-01
## D.ratio.nstopwrds.nwrds                            -2.211e+00  2.469e+00
## D.npnct15.log                                       1.351e+00  9.049e-01
## D.npnct03.log                                       4.958e-01  1.508e+00
## D.terms.n.stem.stop.Ratio                           5.028e+00  5.979e+00
## D.ratio.sum.TfIdf.nwrds                            -6.650e-01  5.501e-01
## D.npnct01.log                                       1.302e-01  7.572e-01
## .rnorm                                              1.010e-02  9.353e-02
## D.TfIdf.sum.stem.stop.Ratio                         2.588e-01  4.491e+00
## storage.fctr16                                     -1.628e-01  4.902e-01
## storage.fctr32                                     -2.673e-01  5.127e-01
## storage.fctr64                                      3.920e-01  5.190e-01
## storage.fctrUnknown                                 1.613e-01  6.285e-01
## D.npnct11.log                                       1.337e-01  3.826e-01
## D.npnct10.log                                      -8.824e-01  1.786e+00
## D.TfIdf.sum.post.stop                               6.645e-02  2.969e-01
## D.TfIdf.sum.post.stem                               1.028e-01  3.136e-01
## D.sum.TfIdf                                         1.028e-01  3.136e-01
## `prdl.my.descr.fctrUnknown#1`                      -3.698e-01  9.146e-01
## `prdl.my.descr.fctriPad 1#0`                        5.379e-01  5.099e-01
## `prdl.my.descr.fctriPad 1#1`                        7.844e-01  8.904e-01
## `prdl.my.descr.fctriPad 2#0`                        1.356e+00  6.123e-01
## `prdl.my.descr.fctriPad 2#1`                       -4.406e-02  8.439e-01
## `prdl.my.descr.fctriPad 3+#0`                       9.707e-01  4.918e-01
## `prdl.my.descr.fctriPad 3+#1`                      -6.777e-01  8.579e-01
## `prdl.my.descr.fctriPadAir#0`                       1.637e-01  4.631e-01
## `prdl.my.descr.fctriPadAir#1`                       1.365e-01  8.362e-01
## `prdl.my.descr.fctriPadmini 2+#0`                  -1.747e-03  4.773e-01
## `prdl.my.descr.fctriPadmini 2+#1`                   1.615e-01  1.053e+00
## `prdl.my.descr.fctriPadmini#0`                      3.233e-01  4.610e-01
## `prdl.my.descr.fctriPadmini#1`                      2.295e-01  9.325e-01
## D.npnct13.log                                      -2.517e-01  3.697e-01
## color.fctrGold                                      1.512e-03  5.147e-01
## `color.fctrSpace Gray`                             -1.545e-01  3.625e-01
## color.fctrUnknown                                  -2.979e-01  2.641e-01
## color.fctrWhite                                    -2.391e-01  2.882e-01
## D.npnct08.log                                       3.386e-01  8.009e-01
## D.npnct16.log                                       1.021e+00  1.873e+00
## D.npnct24.log                                       3.799e-01  2.625e+00
## D.nstopwrds.log                                     4.638e-01  6.751e-01
## D.npnct06.log                                      -2.869e+00  2.005e+00
## D.npnct28.log                                      -6.145e-02  2.225e+00
## D.nuppr.log                                        -1.093e-01  5.010e-01
## D.nchrs.log                                        -5.805e-02  4.869e-01
## D.nwrds.log                                        -1.877e-01  7.920e-01
## D.npnct12.log                                      -7.628e-01  8.262e-01
## carrier.fctrNone                                    2.354e-01  1.161e+00
## carrier.fctrOther                                   6.332e-01  1.954e+00
## carrier.fctrSprint                                  1.436e-01  7.235e-01
## `carrier.fctrT-Mobile`                             -5.694e-01  9.226e-01
## carrier.fctrUnknown                                -1.200e-01  4.841e-01
## carrier.fctrVerizon                                 7.147e-01  4.353e-01
## D.npnct09.log                                      -2.140e+00  7.211e+00
## D.ndgts.log                                         4.273e-01  4.240e-01
## D.nwrds.unq.log                                    -1.891e-01  1.025e+00
## D.terms.n.post.stem.log                            -1.891e-01  1.025e+00
## D.terms.n.post.stop.log                            -1.910e-01  1.022e+00
## cellular.fctr1                                      6.424e-02  1.152e+00
## cellular.fctrUnknown                               -3.051e-01  1.198e+00
## D.npnct14.log                                      -1.273e+00  8.973e-01
## D.terms.n.post.stem                                -6.700e-02  1.983e-01
## D.terms.n.post.stop                                -7.456e-02  1.963e-01
## D.npnct05.log                                      -2.540e+00  1.456e+00
## `condition.fctrFor parts or not working`            3.891e-01  3.820e-01
## `condition.fctrManufacturer refurbished`            5.998e-01  5.553e-01
## condition.fctrNew                                  -3.150e-01  3.061e-01
## `condition.fctrNew other (see details)`             7.596e-01  4.706e-01
## `condition.fctrSeller refurbished`                 -5.317e-01  4.469e-01
## idseq.my                                           -9.664e-05  2.094e-04
## startprice.diff                                    -1.343e-02  1.506e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`      8.178e-01  8.938e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`      -1.855e-01  9.086e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`      -5.915e-03  8.168e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`      8.008e-01  7.351e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`      2.902e-01  7.110e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`  2.989e-01  1.294e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`    -5.684e-03  1.035e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`      3.285e-01  1.393e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`       9.773e-02  1.006e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`      -3.715e-01  1.055e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`      7.797e-02  1.128e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`     -4.728e-01  1.024e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`  2.118e-01  1.466e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`     5.720e-02  9.629e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`      1.775e+00  1.620e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`      -4.368e-02  9.571e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`       7.630e-01  9.247e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`     -1.551e+00  1.582e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`     -6.328e-01  9.515e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.853e-01  2.163e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`     2.155e+00  1.635e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`       2.208e+00  1.643e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`      -1.996e-01  9.382e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`      2.387e+00  8.443e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`      2.750e-01  1.769e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`     1.035e+00  1.160e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`      -1.088e+00  1.076e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`      3.753e-01  1.151e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`     3.619e-01  1.096e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`      0.000e+00  2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`       0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`     -7.199e-01  1.752e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`      0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`  0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`     0.000e+00  2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`    -3.962e-01  1.402e+00
##                                                    z value Pr(>|z|)    
## (Intercept)                                         -0.594   0.5525    
## biddable                                            13.712   <2e-16 ***
## D.ratio.nstopwrds.nwrds                             -0.895   0.3707    
## D.npnct15.log                                        1.494   0.1353    
## D.npnct03.log                                        0.329   0.7423    
## D.terms.n.stem.stop.Ratio                            0.841   0.4004    
## D.ratio.sum.TfIdf.nwrds                             -1.209   0.2267    
## D.npnct01.log                                        0.172   0.8635    
## .rnorm                                               0.108   0.9140    
## D.TfIdf.sum.stem.stop.Ratio                          0.058   0.9541    
## storage.fctr16                                      -0.332   0.7398    
## storage.fctr32                                      -0.521   0.6021    
## storage.fctr64                                       0.755   0.4501    
## storage.fctrUnknown                                  0.257   0.7974    
## D.npnct11.log                                        0.349   0.7268    
## D.npnct10.log                                       -0.494   0.6214    
## D.TfIdf.sum.post.stop                                0.224   0.8229    
## D.TfIdf.sum.post.stem                                0.328   0.7431    
## D.sum.TfIdf                                          0.328   0.7431    
## `prdl.my.descr.fctrUnknown#1`                       -0.404   0.6860    
## `prdl.my.descr.fctriPad 1#0`                         1.055   0.2915    
## `prdl.my.descr.fctriPad 1#1`                         0.881   0.3784    
## `prdl.my.descr.fctriPad 2#0`                         2.215   0.0268 *  
## `prdl.my.descr.fctriPad 2#1`                        -0.052   0.9584    
## `prdl.my.descr.fctriPad 3+#0`                        1.974   0.0484 *  
## `prdl.my.descr.fctriPad 3+#1`                       -0.790   0.4295    
## `prdl.my.descr.fctriPadAir#0`                        0.353   0.7238    
## `prdl.my.descr.fctriPadAir#1`                        0.163   0.8703    
## `prdl.my.descr.fctriPadmini 2+#0`                   -0.004   0.9971    
## `prdl.my.descr.fctriPadmini 2+#1`                    0.153   0.8781    
## `prdl.my.descr.fctriPadmini#0`                       0.701   0.4832    
## `prdl.my.descr.fctriPadmini#1`                       0.246   0.8056    
## D.npnct13.log                                       -0.681   0.4959    
## color.fctrGold                                       0.003   0.9977    
## `color.fctrSpace Gray`                              -0.426   0.6700    
## color.fctrUnknown                                   -1.128   0.2592    
## color.fctrWhite                                     -0.830   0.4067    
## D.npnct08.log                                        0.423   0.6725    
## D.npnct16.log                                        0.545   0.5857    
## D.npnct24.log                                        0.145   0.8849    
## D.nstopwrds.log                                      0.687   0.4921    
## D.npnct06.log                                       -1.431   0.1525    
## D.npnct28.log                                       -0.028   0.9780    
## D.nuppr.log                                         -0.218   0.8273    
## D.nchrs.log                                         -0.119   0.9051    
## D.nwrds.log                                         -0.237   0.8127    
## D.npnct12.log                                       -0.923   0.3558    
## carrier.fctrNone                                     0.203   0.8394    
## carrier.fctrOther                                    0.324   0.7459    
## carrier.fctrSprint                                   0.199   0.8426    
## `carrier.fctrT-Mobile`                              -0.617   0.5372    
## carrier.fctrUnknown                                 -0.248   0.8042    
## carrier.fctrVerizon                                  1.642   0.1006    
## D.npnct09.log                                       -0.297   0.7666    
## D.ndgts.log                                          1.008   0.3135    
## D.nwrds.unq.log                                     -0.185   0.8535    
## D.terms.n.post.stem.log                             -0.185   0.8535    
## D.terms.n.post.stop.log                             -0.187   0.8517    
## cellular.fctr1                                       0.056   0.9555    
## cellular.fctrUnknown                                -0.255   0.7989    
## D.npnct14.log                                       -1.418   0.1561    
## D.terms.n.post.stem                                 -0.338   0.7354    
## D.terms.n.post.stop                                 -0.380   0.7041    
## D.npnct05.log                                       -1.745   0.0810 .  
## `condition.fctrFor parts or not working`             1.019   0.3084    
## `condition.fctrManufacturer refurbished`             1.080   0.2801    
## condition.fctrNew                                   -1.029   0.3034    
## `condition.fctrNew other (see details)`              1.614   0.1065    
## `condition.fctrSeller refurbished`                  -1.190   0.2341    
## idseq.my                                            -0.462   0.6444    
## startprice.diff                                     -8.916   <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`       0.915   0.3602    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`       -0.204   0.8383    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`       -0.007   0.9942    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`       1.089   0.2760    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`       0.408   0.6831    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`   0.231   0.8174    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`     -0.005   0.9956    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`       0.236   0.8136    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`        0.097   0.9226    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`       -0.352   0.7248    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`       0.069   0.9449    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`      -0.462   0.6441    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`   0.145   0.8851    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`      0.059   0.9526    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`       1.095   0.2734    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`       -0.046   0.9636    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`        0.825   0.4093    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`      -0.981   0.3268    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`      -0.665   0.5060    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`  -0.086   0.9317    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`      1.318   0.1875    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`       0.000   1.0000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`        1.344   0.1789    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`       -0.213   0.8315    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`       2.827   0.0047 ** 
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`       0.155   0.8765    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`      0.893   0.3721    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`       0.000   1.0000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`       -1.011   0.3119    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`       0.326   0.7444    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`       0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`      0.330   0.7412    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`       0.000   1.0000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`       0.000   1.0000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`        0.000   1.0000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`        0.000   1.0000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`        0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`       0.000   1.0000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`      -0.411   0.6812    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`       0.000   1.0000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`       0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`   0.000   1.0000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`      0.000   1.0000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`     -0.283   0.7775    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  746.68  on 814  degrees of freedom
## AIC: 1056.7
## 
## Number of Fisher Scoring iterations: 18
## 
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7241094
## 3        0.2 0.7644363
## 4        0.3 0.8008172
## 5        0.4 0.8105148
## 6        0.5 0.8175520
## 7        0.6 0.8029021
## 8        0.7 0.7888748
## 9        0.8 0.7449393
## 10       0.9 0.5495208
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.bayesglm.N
## 1         N                                457
## 2         Y                                 93
##   sold.fctr.predict.All.X.bayesglm.Y
## 1                                 65
## 2                                354
##          Prediction
## Reference   N   Y
##         N 457  65
##         Y  93 354
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.369453e-01   6.704422e-01   8.121533e-01   8.596691e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   6.718903e-86   3.171338e-02 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6983289
## 3        0.2 0.7231527
## 4        0.3 0.7378641
## 5        0.4 0.7551963
## 6        0.5 0.7641278
## 7        0.6 0.7593308
## 8        0.7 0.7472826
## 9        0.8 0.6775148
## 10       0.9 0.4436364
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.X.bayesglm.N
## 1         N                                387
## 2         Y                                102
##   sold.fctr.predict.All.X.bayesglm.Y
## 1                                 90
## 2                                311
##          Prediction
## Reference   N   Y
##         N 387  90
##         Y 102 311
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.842697e-01   5.654496e-01   7.557654e-01   8.108777e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.381223e-53   4.272789e-01 
##         model_id model_method
## 1 All.X.bayesglm     bayesglm
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      3.299                 0.772
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.9042146                    0.5        0.817552        0.7770898
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8121533             0.8596691     0.5506703   0.8427064
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.7641278        0.7842697
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.7557654             0.8108777     0.5654496    1056.676
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.03447532      0.06919184
##                   label step_major step_minor     bgn     end elapsed
## 3 fit.models_1_bayesglm          3          0 102.277 108.717    6.44
## 4   fit.models_1_glmnet          4          0 108.718      NA      NA
## [1] "fitting model: All.X.glmnet"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: glmnet
## Loaded glmnet 2.0-2

## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0559 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda

##             Length Class      Mode     
## a0             94  -none-     numeric  
## beta        14476  dgCMatrix  S4       
## df             94  -none-     numeric  
## dim             2  -none-     numeric  
## lambda         94  -none-     numeric  
## dev.ratio      94  -none-     numeric  
## nulldev         1  -none-     numeric  
## npasses         1  -none-     numeric  
## jerr            1  -none-     numeric  
## offset          1  -none-     logical  
## classnames      2  -none-     character
## call            5  -none-     call     
## nobs            1  -none-     numeric  
## lambdaOpt       1  -none-     numeric  
## xNames        154  -none-     character
## problemType     1  -none-     character
## tuneValue       2  data.frame list     
## obsLevels       2  -none-     character
## [1] "min lambda > lambdaOpt:"
##                 (Intercept)                    biddable 
##                -0.899322561                 2.001584168 
##  prdl.my.descr.fctriPad 2#0 prdl.my.descr.fctriPad 3+#0 
##                 0.290095210                 0.105586993 
##         D.terms.n.post.stem         D.terms.n.post.stop 
##                -0.001144834                -0.006953112 
##               D.npnct05.log           condition.fctrNew 
##                -0.404659882                -0.113024355 
##             startprice.diff 
##                -0.005271528 
## [1] "max lambda < lambdaOpt:"
##                                      (Intercept) 
##                                     2.036899e+00 
##                                         biddable 
##                                     3.108468e+00 
##                          D.ratio.nstopwrds.nwrds 
##                                    -7.610486e+00 
##                                    D.npnct15.log 
##                                     1.337715e+00 
##                                    D.npnct03.log 
##                                     5.644884e-01 
##                        D.terms.n.stem.stop.Ratio 
##                                     3.877586e+00 
##                          D.ratio.sum.TfIdf.nwrds 
##                                    -9.955486e-01 
##                                    D.npnct01.log 
##                                    -6.912816e-02 
##                                           .rnorm 
##                                     1.200630e-02 
##                      D.TfIdf.sum.stem.stop.Ratio 
##                                     2.583627e-01 
##                                   storage.fctr16 
##                                    -1.492889e-01 
##                                   storage.fctr32 
##                                    -2.538723e-01 
##                                   storage.fctr64 
##                                     3.898066e-01 
##                              storage.fctrUnknown 
##                                     2.109165e-01 
##                                    D.npnct11.log 
##                                     1.336271e-01 
##                                    D.npnct10.log 
##                                    -1.008654e+00 
##                            D.TfIdf.sum.post.stem 
##                                     1.740092e-01 
##                                      D.sum.TfIdf 
##                                     1.604829e-01 
##                      prdl.my.descr.fctrUnknown#1 
##                                    -7.484133e-01 
##                       prdl.my.descr.fctriPad 1#0 
##                                     7.248021e-01 
##                       prdl.my.descr.fctriPad 1#1 
##                                     7.365368e-01 
##                       prdl.my.descr.fctriPad 2#0 
##                                     1.600105e+00 
##                      prdl.my.descr.fctriPad 3+#0 
##                                     1.164531e+00 
##                      prdl.my.descr.fctriPad 3+#1 
##                                    -8.444403e-01 
##                      prdl.my.descr.fctriPadAir#0 
##                                     3.271335e-01 
##                      prdl.my.descr.fctriPadAir#1 
##                                     2.013535e-01 
##                  prdl.my.descr.fctriPadmini 2+#0 
##                                     1.593440e-01 
##                  prdl.my.descr.fctriPadmini 2+#1 
##                                     2.251656e-01 
##                     prdl.my.descr.fctriPadmini#0 
##                                     4.908239e-01 
##                     prdl.my.descr.fctriPadmini#1 
##                                    -3.052192e-01 
##                                    D.npnct13.log 
##                                    -3.497295e-01 
##                                   color.fctrGold 
##                                    -3.755518e-02 
##                             color.fctrSpace Gray 
##                                    -1.279132e-01 
##                                color.fctrUnknown 
##                                    -3.204201e-01 
##                                  color.fctrWhite 
##                                    -2.602917e-01 
##                                    D.npnct08.log 
##                                     4.963979e-01 
##                                    D.npnct16.log 
##                                     1.398245e+00 
##                                  D.nstopwrds.log 
##                                     2.185585e+00 
##                                    D.npnct06.log 
##                                    -3.485130e+00 
##                                    D.npnct28.log 
##                                    -1.526272e+00 
##                                      D.nuppr.log 
##                                    -5.352202e-01 
##                                      D.nchrs.log 
##                                    -1.121831e-04 
##                                      D.nwrds.log 
##                                    -1.475618e+00 
##                                    D.npnct12.log 
##                                    -7.092382e-01 
##                                 carrier.fctrNone 
##                                     1.997221e-01 
##                                carrier.fctrOther 
##                                     7.534329e+00 
##                               carrier.fctrSprint 
##                                     1.776846e-01 
##                             carrier.fctrT-Mobile 
##                                    -7.284659e-01 
##                              carrier.fctrUnknown 
##                                    -6.448419e-02 
##                              carrier.fctrVerizon 
##                                     7.545986e-01 
##                                    D.npnct09.log 
##                                    -1.853972e+00 
##                                      D.ndgts.log 
##                                     4.390575e-01 
##                                  D.nwrds.unq.log 
##                                    -1.757319e-01 
##                          D.terms.n.post.stem.log 
##                                    -1.348930e-01 
##                          D.terms.n.post.stop.log 
##                                    -7.030408e-01 
##                             cellular.fctrUnknown 
##                                    -3.520470e-01 
##                                    D.npnct14.log 
##                                    -1.324894e+00 
##                              D.terms.n.post.stop 
##                                    -1.318643e-01 
##                                    D.npnct05.log 
##                                    -3.001452e+00 
##           condition.fctrFor parts or not working 
##                                     3.717266e-01 
##           condition.fctrManufacturer refurbished 
##                                     6.531141e-01 
##                                condition.fctrNew 
##                                    -3.114269e-01 
##            condition.fctrNew other (see details) 
##                                     7.851594e-01 
##                 condition.fctrSeller refurbished 
##                                    -5.922203e-01 
##                                         idseq.my 
##                                    -8.380108e-05 
##                                  startprice.diff 
##                                    -1.378775e-02 
##     prdl.my.descr.fctrUnknown#1:.clusterid.fctr2 
##                                     1.219070e+00 
##      prdl.my.descr.fctriPad 1#1:.clusterid.fctr2 
##                                    -1.429967e-01 
##      prdl.my.descr.fctriPad 2#1:.clusterid.fctr2 
##                                    -1.283517e-01 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2 
##                                     1.085719e+00 
##     prdl.my.descr.fctriPadAir#1:.clusterid.fctr2 
##                                     3.584049e-01 
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2 
##                                     4.109503e-01 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr2 
##                                     6.237503e-01 
##     prdl.my.descr.fctrUnknown#1:.clusterid.fctr3 
##                                     7.536223e-01 
##      prdl.my.descr.fctriPad 1#1:.clusterid.fctr3 
##                                     2.338335e-01 
##      prdl.my.descr.fctriPad 2#1:.clusterid.fctr3 
##                                    -4.454604e-01 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3 
##                                     3.337723e-01 
##     prdl.my.descr.fctriPadAir#1:.clusterid.fctr3 
##                                    -6.278645e-01 
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3 
##                                     2.062872e-01 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr3 
##                                     5.957491e-01 
##     prdl.my.descr.fctrUnknown#1:.clusterid.fctr4 
##                                     6.921266e+00 
##      prdl.my.descr.fctriPad 1#1:.clusterid.fctr4 
##                                    -3.873423e-02 
##      prdl.my.descr.fctriPad 2#1:.clusterid.fctr4 
##                                     9.641785e-01 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4 
##                                    -5.028161e+00 
##     prdl.my.descr.fctriPadAir#1:.clusterid.fctr4 
##                                    -8.400228e-01 
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4 
##                                    -3.445745e+00 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr4 
##                                     7.144367e+00 
##      prdl.my.descr.fctriPad 1#1:.clusterid.fctr5 
##                                     6.830081e+00 
##      prdl.my.descr.fctriPad 2#1:.clusterid.fctr5 
##                                    -1.525406e-01 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5 
##                                     2.753577e+00 
##     prdl.my.descr.fctriPadAir#1:.clusterid.fctr5 
##                                    -2.200136e+00 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr5 
##                                     1.902956e+00 
##      prdl.my.descr.fctriPad 2#1:.clusterid.fctr6 
##                                    -1.588487e+00 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6 
##                                     7.311634e-01 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr6 
##                                     9.893509e-01 
##     prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7 
##                                    -4.388981e+00 
##    prdl.my.descr.fctriPadmini#1:.clusterid.fctr7 
##                                    -6.657104e-02 
## character(0)
## character(0)
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6412653
## 3        0.2 0.6851424
## 4        0.3 0.7174542
## 5        0.4 0.7514061
## 6        0.5 0.7771295
## 7        0.6 0.8029021
## 8        0.7 0.7994859
## 9        0.8 0.1825558
## 10       0.9 0.0000000
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.glmnet.N
## 1         N                              474
## 2         Y                              115
##   sold.fctr.predict.All.X.glmnet.Y
## 1                               48
## 2                              332
##          Prediction
## Reference   N   Y
##         N 474  48
##         Y 115 332
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.317853e-01   6.578590e-01   8.067182e-01   8.548166e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.002400e-82   2.346973e-07 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6418026
## 3        0.2 0.6821192
## 4        0.3 0.7079832
## 5        0.4 0.7440191
## 6        0.5 0.7627329
## 7        0.6 0.7932817
## 8        0.7 0.7956104
## 9        0.8 0.1596452
## 10       0.9 0.0000000
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.X.glmnet.N
## 1         N                              451
## 2         Y                              123
##   sold.fctr.predict.All.X.glmnet.Y
## 1                               26
## 2                              290
##          Prediction
## Reference   N   Y
##         N 451  26
##         Y 123 290
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.325843e-01   6.580401e-01   8.064031e-01   8.565410e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   7.704832e-78   3.702005e-15 
##       model_id model_method
## 1 All.X.glmnet       glmnet
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               9                      6.992                 1.523
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.8677904                    0.6       0.8029021        0.8008256
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8067182             0.8548166       0.59697   0.8560007
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.7       0.7956104        0.8325843
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8064031              0.856541     0.6580401
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.03693608       0.0748364
##                 label step_major step_minor     bgn     end elapsed
## 4 fit.models_1_glmnet          4          0 108.718 119.617  10.899
## 5  fit.models_1_rpart          5          0 119.617      NA      NA
## [1] "fitting model: All.X.no.rnorm.rpart"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00671 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: cp

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 969 
## 
##            CP nsplit rel error
## 1 0.527964206      0 1.0000000
## 2 0.134228188      1 0.4720358
## 3 0.006711409      2 0.3378076
## 
## Variable importance
##                               biddable 
##                                     51 
##                        startprice.diff 
##                                     28 
##                               idseq.my 
##                                      8 
##            prdl.my.descr.fctriPad 3+#0 
##                                      3 
##             prdl.my.descr.fctriPad 2#0 
##                                      3 
## condition.fctrFor parts or not working 
##                                      2 
##             prdl.my.descr.fctriPad 1#0 
##                                      2 
##                         color.fctrGold 
##                                      1 
## 
## Node number 1: 969 observations,    complexity param=0.5279642
##   predicted class=N  expected loss=0.4613003  P(node) =1
##     class counts:   522   447
##    probabilities: 0.539 0.461 
##   left son=2 (539 obs) right son=3 (430 obs)
##   Primary splits:
##       biddable                   < 0.5       to the left,  improve=151.58290, (0 missing)
##       startprice.diff            < 62.89456  to the right, improve= 82.96307, (0 missing)
##       idseq.my                   < 876.5     to the right, improve= 37.84375, (0 missing)
##       condition.fctrNew          < 0.5       to the right, improve= 16.22579, (0 missing)
##       prdl.my.descr.fctriPad 2#0 < 0.5       to the left,  improve= 13.28426, (0 missing)
##   Surrogate splits:
##       idseq.my                               < 798       to the right, agree=0.628, adj=0.163, (0 split)
##       prdl.my.descr.fctriPad 3+#0            < 0.5       to the left,  agree=0.586, adj=0.067, (0 split)
##       prdl.my.descr.fctriPad 2#0             < 0.5       to the left,  agree=0.579, adj=0.051, (0 split)
##       condition.fctrFor parts or not working < 0.5       to the left,  agree=0.578, adj=0.049, (0 split)
##       prdl.my.descr.fctriPad 1#0             < 0.5       to the left,  agree=0.573, adj=0.037, (0 split)
## 
## Node number 2: 539 observations
##   predicted class=N  expected loss=0.2115028  P(node) =0.5562436
##     class counts:   425   114
##    probabilities: 0.788 0.212 
## 
## Node number 3: 430 observations,    complexity param=0.1342282
##   predicted class=Y  expected loss=0.2255814  P(node) =0.4437564
##     class counts:    97   333
##    probabilities: 0.226 0.774 
##   left son=6 (80 obs) right son=7 (350 obs)
##   Primary splits:
##       startprice.diff            < 63.51092  to the right, improve=82.902920, (0 missing)
##       idseq.my                   < 893.5     to the right, improve=15.999440, (0 missing)
##       cellular.fctrUnknown       < 0.5       to the right, improve= 3.057989, (0 missing)
##       prdl.my.descr.fctriPad 2#0 < 0.5       to the left,  improve= 2.726027, (0 missing)
##       condition.fctrNew          < 0.5       to the right, improve= 2.683363, (0 missing)
##   Surrogate splits:
##       color.fctrGold          < 0.5       to the right, agree=0.819, adj=0.025, (0 split)
##       D.ratio.nstopwrds.nwrds < 0.1380952 to the left,  agree=0.816, adj=0.013, (0 split)
##       D.nwrds.unq.log         < 2.602003  to the right, agree=0.816, adj=0.013, (0 split)
##       D.terms.n.post.stem.log < 2.602003  to the right, agree=0.816, adj=0.013, (0 split)
##       D.terms.n.post.stop.log < 2.602003  to the right, agree=0.816, adj=0.013, (0 split)
## 
## Node number 6: 80 observations
##   predicted class=N  expected loss=0.125  P(node) =0.08255934
##     class counts:    70    10
##    probabilities: 0.875 0.125 
## 
## Node number 7: 350 observations
##   predicted class=Y  expected loss=0.07714286  P(node) =0.3611971
##     class counts:    27   323
##    probabilities: 0.077 0.923 
## 
## n= 969 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 969 447 N (0.53869969 0.46130031)  
##   2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
##   3) biddable>=0.5 430  97 Y (0.22558140 0.77441860)  
##     6) startprice.diff>=63.51092 80  10 N (0.87500000 0.12500000) *
##     7) startprice.diff< 63.51092 350  27 Y (0.07714286 0.92285714) *
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6313559
## 3        0.2 0.6541916
## 4        0.3 0.8105395
## 5        0.4 0.8105395
## 6        0.5 0.8105395
## 7        0.6 0.8105395
## 8        0.7 0.8105395
## 9        0.8 0.8105395
## 10       0.9 0.8105395
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.no.rnorm.rpart.N
## 1         N                                      495
## 2         Y                                      124
##   sold.fctr.predict.All.X.no.rnorm.rpart.Y
## 1                                       27
## 2                                      323
##          Prediction
## Reference   N   Y
##         N 495  27
##         Y 124 323
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.441692e-01   6.814949e-01   8.197763e-01   8.664485e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.762753e-90   5.612287e-15 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6339217
## 3        0.2 0.6633907
## 4        0.3 0.8102981
## 5        0.4 0.8102981
## 6        0.5 0.8102981
## 7        0.6 0.8102981
## 8        0.7 0.8102981
## 9        0.8 0.8102981
## 10       0.9 0.8102981
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.X.no.rnorm.rpart.N
## 1         N                                      451
## 2         Y                                      114
##   sold.fctr.predict.All.X.no.rnorm.rpart.Y
## 1                                       26
## 2                                      299
##          Prediction
## Reference   N   Y
##         N 451  26
##         Y 114 299
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.426966e-01   6.791719e-01   8.170871e-01   8.660125e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.090657e-83   1.940362e-13 
##               model_id model_method
## 1 All.X.no.rnorm.rpart        rpart
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                      1.834                 0.096
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.8434283                    0.9       0.8105395        0.8338493
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8197763             0.8664485     0.6645079   0.8469855
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.9       0.8102981        0.8426966
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8170871             0.8660125     0.6791719
##   max.AccuracySD.fit max.KappaSD.fit
## 1        0.008937311      0.01629107
##                label step_major step_minor     bgn     end elapsed
## 5 fit.models_1_rpart          5          0 119.617 125.076   5.459
## 6    fit.models_1_rf          6          0 125.076      NA      NA
## [1] "fitting model: All.X.no.rnorm.rf"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:gdata':
## 
##     combine

## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 77 on full training set

##                 Length Class      Mode     
## call               4   -none-     call     
## type               1   -none-     character
## predicted        969   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           1938   matrix     numeric  
## oob.times        969   -none-     numeric  
## classes            2   -none-     character
## importance       153   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y                969   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           153   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.8579655
## 3        0.2 0.9520767
## 4        0.3 0.9781182
## 5        0.4 0.9988827
## 6        0.5 1.0000000
## 7        0.6 1.0000000
## 8        0.7 0.9652778
## 9        0.8 0.8935644
## 10       0.9 0.8111702
## 11       1.0 0.2807692

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.no.rnorm.rf.N
## 1         N                                   522
## 2         Y                                    NA
##   sold.fctr.predict.All.X.no.rnorm.rf.Y
## 1                                    NA
## 2                                   447
##          Prediction
## Reference   N   Y
##         N 522   0
##         Y   0 447
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   1.000000e+00   1.000000e+00   9.962003e-01   1.000000e+00   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##  4.731267e-261            NaN 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.7527675
## 3        0.2 0.7930328
## 4        0.3 0.8031146
## 5        0.4 0.8169014
## 6        0.5 0.8294479
## 7        0.6 0.8209719
## 8        0.7 0.8156124
## 9        0.8 0.7685714
## 10       0.9 0.6645669
## 11       1.0 0.1562500
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.X.no.rnorm.rf.N
## 1         N                                   413
## 2         Y                                    75
##   sold.fctr.predict.All.X.no.rnorm.rf.Y
## 1                                    64
## 2                                   338
##          Prediction
## Reference   N   Y
##         N 413  64
##         Y  75 338
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.438202e-01   6.854548e-01   8.182763e-01   8.670627e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   2.342835e-84   3.963328e-01 
##            model_id model_method
## 1 All.X.no.rnorm.rf           rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                     19.691                 7.227
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.6               1        0.8482972
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.9962003                     1     0.6925622   0.9180131
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.8294479        0.8438202
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8182763             0.8670627     0.6854548
##   max.AccuracySD.fit max.KappaSD.fit
## 1        0.008191181      0.01601525
##              label step_major step_minor     bgn     end elapsed
## 6  fit.models_1_rf          6          0 125.076 148.261  23.185
## 7 fit.models_1_glm          7          0 148.261      NA      NA
## [1] "fitting model: All.Interact.X.glm"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
##   306, 346, 354, 619, 643, 935, 939

## Warning: not plotting observations with leverage one:
##   306, 346, 354, 619, 643, 935, 939

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -8.49    0.00    0.00    0.00    8.49  
## 
## Coefficients: (115 not defined because of singularities)
##                                                                 Estimate
## (Intercept)                                                   -1.949e+18
## D.ratio.nstopwrds.nwrds                                       -7.914e+15
## D.terms.n.stem.stop.Ratio                                      1.946e+18
## D.npnct01.log                                                 -7.371e+13
## .rnorm                                                        -2.269e+13
## storage.fctr16                                                -1.048e+14
## storage.fctr32                                                -5.766e+13
## storage.fctr64                                                 2.492e+14
## storage.fctrUnknown                                           -1.001e+14
## D.npnct11.log                                                  2.205e+14
## D.npnct10.log                                                 -1.547e+15
## D.TfIdf.sum.post.stop                                          1.832e+15
## D.TfIdf.sum.post.stem                                         -1.463e+15
## D.sum.TfIdf                                                           NA
## `prdl.my.descr.fctrUnknown#1`                                 -2.216e+16
## `prdl.my.descr.fctriPad 1#0`                                  -3.201e+14
## `prdl.my.descr.fctriPad 1#1`                                   6.959e+15
## `prdl.my.descr.fctriPad 2#0`                                   9.460e+14
## `prdl.my.descr.fctriPad 2#1`                                   1.726e+16
## `prdl.my.descr.fctriPad 3+#0`                                  5.188e+14
## `prdl.my.descr.fctriPad 3+#1`                                  8.052e+14
## `prdl.my.descr.fctriPadAir#0`                                 -7.874e+13
## `prdl.my.descr.fctriPadAir#1`                                  9.935e+14
## `prdl.my.descr.fctriPadmini 2+#0`                             -3.994e+14
## `prdl.my.descr.fctriPadmini 2+#1`                              1.116e+16
## `prdl.my.descr.fctriPadmini#0`                                 5.299e+14
## `prdl.my.descr.fctriPadmini#1`                                -2.064e+15
## D.npnct13.log                                                 -2.577e+14
## color.fctrGold                                                -2.592e+14
## `color.fctrSpace Gray`                                        -5.731e+14
## color.fctrUnknown                                             -3.102e+14
## color.fctrWhite                                               -2.727e+14
## D.npnct08.log                                                 -4.271e+14
## D.npnct16.log                                                  5.412e+14
## D.npnct24.log                                                         NA
## D.nstopwrds.log                                                1.777e+15
## D.npnct06.log                                                 -1.699e+15
## D.npnct28.log                                                  1.187e+15
## D.nuppr.log                                                    1.350e+15
## D.npnct12.log                                                 -4.147e+14
## D.npnct09.log                                                 -1.996e+15
## D.ndgts.log                                                    2.058e+14
## D.nwrds.unq.log                                               -2.159e+18
## D.terms.n.post.stem.log                                               NA
## D.terms.n.post.stop.log                                        2.154e+18
## D.npnct14.log                                                 -9.199e+13
## D.terms.n.post.stem                                            1.115e+16
## D.terms.n.post.stop                                           -1.089e+16
## D.npnct05.log                                                 -1.707e+15
## `condition.fctrFor parts or not working`                      -7.520e+13
## `condition.fctrManufacturer refurbished`                       8.492e+13
## condition.fctrNew                                             -1.198e+14
## `condition.fctrNew other (see details)`                        3.110e+14
## `condition.fctrSeller refurbished`                            -3.901e+14
## idseq.my                                                      -5.154e+11
## D.ratio.sum.TfIdf.nwrds                                        1.085e+15
## D.TfIdf.sum.stem.stop.Ratio                                    1.204e+16
## D.npnct15.log                                                 -8.930e+14
## D.npnct03.log                                                  6.038e+13
## D.nwrds.log                                                    1.008e+15
## D.nchrs.log                                                   -1.931e+15
## startprice.diff                                               -4.156e+12
## biddable                                                       1.107e+15
## cellular.fctr1                                                -1.068e+14
## cellular.fctrUnknown                                          -3.213e+13
## carrier.fctrNone                                                      NA
## carrier.fctrOther                                              6.655e+15
## carrier.fctrSprint                                             3.488e+14
## `carrier.fctrT-Mobile`                                        -7.666e+13
## carrier.fctrUnknown                                           -1.190e+14
## carrier.fctrVerizon                                            3.237e+14
## `prdl.my.descr.fctrUnknown#1:idseq.my`                         2.317e+12
## `prdl.my.descr.fctriPad 1#0:idseq.my`                          5.716e+11
## `prdl.my.descr.fctriPad 1#1:idseq.my`                          8.043e+11
## `prdl.my.descr.fctriPad 2#0:idseq.my`                          1.894e+11
## `prdl.my.descr.fctriPad 2#1:idseq.my`                          1.308e+11
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                         3.178e+11
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                         5.863e+11
## `prdl.my.descr.fctriPadAir#0:idseq.my`                         5.262e+11
## `prdl.my.descr.fctriPadAir#1:idseq.my`                         9.690e+11
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                     3.645e+11
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                     1.815e+11
## `prdl.my.descr.fctriPadmini#0:idseq.my`                        2.030e+11
## `prdl.my.descr.fctriPadmini#1:idseq.my`                        2.471e+11
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`         -1.543e+15
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`          -7.686e+14
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`          -3.397e+15
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`         -3.023e+15
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`         -2.267e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`             NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`     -2.807e+15
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`      2.284e+16
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`      -1.704e+15
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`      -1.245e+16
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`     -8.177e+14
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`      1.134e+14
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`  2.137e+16
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                     1.789e+15
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                     9.363e+14
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                    1.234e+15
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                    -1.227e+15
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                    -1.640e+15
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                   -1.703e+15
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                    1.582e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                1.310e+16
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                      2.585e+15
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                       3.350e+15
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                      -1.273e+14
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                     -3.288e+15
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                     -1.576e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                  2.244e+16
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                            NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                     -2.280e+15
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                      -3.416e+15
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                      -1.154e+15
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                      1.916e+15
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                      5.694e+14
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                 -2.087e+16
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                            NA
## `startprice.diff:biddable`                                    -1.179e+13
## `cellular.fctr1:carrier.fctrNone`                                     NA
## `cellular.fctrUnknown:carrier.fctrNone`                               NA
## `cellular.fctr1:carrier.fctrOther`                                    NA
## `cellular.fctrUnknown:carrier.fctrOther`                              NA
## `cellular.fctr1:carrier.fctrSprint`                                   NA
## `cellular.fctrUnknown:carrier.fctrSprint`                             NA
## `cellular.fctr1:carrier.fctrT-Mobile`                                 NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                           NA
## `cellular.fctr1:carrier.fctrUnknown`                                  NA
## `cellular.fctrUnknown:carrier.fctrUnknown`                            NA
## `cellular.fctr1:carrier.fctrVerizon`                                  NA
## `cellular.fctrUnknown:carrier.fctrVerizon`                            NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                 9.529e+13
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                 -6.143e+14
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                  8.278e+14
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                 2.097e+13
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                -2.463e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`             1.036e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                5.563e+14
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                -1.431e+14
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                 -4.879e+13
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                 -1.209e+14
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                 2.561e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                -6.053e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`            -2.486e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                1.338e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                 2.887e+15
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                  1.413e+14
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                  1.074e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                -1.298e+15
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                -9.170e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`            -4.855e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                2.876e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                  3.111e+15
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                  2.088e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                 1.528e+15
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                -2.634e+15
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                1.399e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                 -1.282e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                -1.249e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                1.627e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                -1.306e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                5.512e+14
##                                                               Std. Error
## (Intercept)                                                    8.552e+10
## D.ratio.nstopwrds.nwrds                                        1.734e+08
## D.terms.n.stem.stop.Ratio                                      8.548e+10
## D.npnct01.log                                                  1.717e+07
## .rnorm                                                         2.353e+06
## storage.fctr16                                                 1.169e+07
## storage.fctr32                                                 1.253e+07
## storage.fctr64                                                 1.220e+07
## storage.fctrUnknown                                            1.658e+07
## D.npnct11.log                                                  1.045e+07
## D.npnct10.log                                                  6.406e+07
## D.TfIdf.sum.post.stop                                          9.198e+07
## D.TfIdf.sum.post.stem                                          9.570e+07
## D.sum.TfIdf                                                           NA
## `prdl.my.descr.fctrUnknown#1`                                  9.758e+08
## `prdl.my.descr.fctriPad 1#0`                                   2.668e+07
## `prdl.my.descr.fctriPad 1#1`                                   4.883e+08
## `prdl.my.descr.fctriPad 2#0`                                   2.945e+07
## `prdl.my.descr.fctriPad 2#1`                                   5.159e+08
## `prdl.my.descr.fctriPad 3+#0`                                  2.679e+07
## `prdl.my.descr.fctriPad 3+#1`                                  4.493e+08
## `prdl.my.descr.fctriPadAir#0`                                  2.719e+07
## `prdl.my.descr.fctriPadAir#1`                                  4.723e+08
## `prdl.my.descr.fctriPadmini 2+#0`                              2.893e+07
## `prdl.my.descr.fctriPadmini 2+#1`                              8.885e+08
## `prdl.my.descr.fctriPadmini#0`                                 2.750e+07
## `prdl.my.descr.fctriPadmini#1`                                 2.667e+08
## D.npnct13.log                                                  1.160e+07
## color.fctrGold                                                 1.311e+07
## `color.fctrSpace Gray`                                         9.452e+06
## color.fctrUnknown                                              6.921e+06
## color.fctrWhite                                                7.278e+06
## D.npnct08.log                                                  2.038e+07
## D.npnct16.log                                                  6.772e+07
## D.npnct24.log                                                         NA
## D.nstopwrds.log                                                4.786e+07
## D.npnct06.log                                                  7.054e+07
## D.npnct28.log                                                  7.155e+07
## D.nuppr.log                                                    1.442e+08
## D.npnct12.log                                                  2.026e+07
## D.npnct09.log                                                  5.427e+07
## D.ndgts.log                                                    1.452e+07
## D.nwrds.unq.log                                                9.505e+10
## D.terms.n.post.stem.log                                               NA
## D.terms.n.post.stop.log                                        9.503e+10
## D.npnct14.log                                                  2.119e+07
## D.terms.n.post.stem                                            5.040e+08
## D.terms.n.post.stop                                            5.020e+08
## D.npnct05.log                                                  3.595e+07
## `condition.fctrFor parts or not working`                       1.031e+07
## `condition.fctrManufacturer refurbished`                       1.434e+07
## condition.fctrNew                                              7.893e+06
## `condition.fctrNew other (see details)`                        1.154e+07
## `condition.fctrSeller refurbished`                             1.141e+07
## idseq.my                                                       1.751e+04
## D.ratio.sum.TfIdf.nwrds                                        5.714e+07
## D.TfIdf.sum.stem.stop.Ratio                                    6.741e+08
## D.npnct15.log                                                  1.034e+08
## D.npnct03.log                                                  5.880e+07
## D.nwrds.log                                                    1.172e+08
## D.nchrs.log                                                    1.876e+08
## startprice.diff                                                3.333e+04
## biddable                                                       5.549e+06
## cellular.fctr1                                                 7.773e+06
## cellular.fctrUnknown                                           1.675e+07
## carrier.fctrNone                                                      NA
## carrier.fctrOther                                              1.238e+08
## carrier.fctrSprint                                             1.891e+07
## `carrier.fctrT-Mobile`                                         2.505e+07
## carrier.fctrUnknown                                            1.193e+07
## carrier.fctrVerizon                                            1.112e+07
## `prdl.my.descr.fctrUnknown#1:idseq.my`                         4.053e+04
## `prdl.my.descr.fctriPad 1#0:idseq.my`                          2.345e+04
## `prdl.my.descr.fctriPad 1#1:idseq.my`                          2.560e+04
## `prdl.my.descr.fctriPad 2#0:idseq.my`                          2.670e+04
## `prdl.my.descr.fctriPad 2#1:idseq.my`                          2.401e+04
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                         2.235e+04
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                         2.304e+04
## `prdl.my.descr.fctriPadAir#0:idseq.my`                         2.178e+04
## `prdl.my.descr.fctriPadAir#1:idseq.my`                         2.359e+04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                     2.319e+04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                     5.038e+04
## `prdl.my.descr.fctriPadmini#0:idseq.my`                        2.152e+04
## `prdl.my.descr.fctriPadmini#1:idseq.my`                        2.662e+04
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`          5.944e+07
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`           6.659e+07
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`           7.203e+07
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`          5.964e+07
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`          7.046e+07
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`             NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`      3.132e+08
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`      8.688e+08
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`       3.992e+08
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`       4.373e+08
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`      3.828e+08
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`      4.121e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`  5.166e+08
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                     1.125e+08
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                     1.525e+08
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                    1.099e+08
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                     6.480e+07
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                     7.651e+07
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                    8.984e+07
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                    8.144e+07
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                2.057e+08
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                      1.682e+08
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                       1.344e+08
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                       1.177e+08
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                      1.199e+08
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                      1.097e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                  3.913e+08
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                            NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                      1.499e+08
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                       1.314e+08
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                       1.194e+08
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                      1.138e+08
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                      1.037e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                  2.856e+08
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                            NA
## `startprice.diff:biddable`                                     4.794e+04
## `cellular.fctr1:carrier.fctrNone`                                     NA
## `cellular.fctrUnknown:carrier.fctrNone`                               NA
## `cellular.fctr1:carrier.fctrOther`                                    NA
## `cellular.fctrUnknown:carrier.fctrOther`                              NA
## `cellular.fctr1:carrier.fctrSprint`                                   NA
## `cellular.fctrUnknown:carrier.fctrSprint`                             NA
## `cellular.fctr1:carrier.fctrT-Mobile`                                 NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                           NA
## `cellular.fctr1:carrier.fctrUnknown`                                  NA
## `cellular.fctrUnknown:carrier.fctrUnknown`                            NA
## `cellular.fctr1:carrier.fctrVerizon`                                  NA
## `cellular.fctrUnknown:carrier.fctrVerizon`                            NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                 3.406e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                  2.792e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                  2.819e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                 2.135e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                 2.140e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`             5.006e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                3.477e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                 4.614e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                  3.149e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                  2.990e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                 2.879e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                 2.876e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`             5.071e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                3.837e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                 6.170e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                  2.921e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                  3.284e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                 2.787e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                 2.824e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`             9.589e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                3.906e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                  4.293e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                  3.479e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                 2.490e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                 1.062e+08
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                3.897e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                  3.275e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                 3.788e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                3.502e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                 5.025e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                5.232e+07
##                                                                  z value
## (Intercept)                                                    -22794359
## D.ratio.nstopwrds.nwrds                                        -45647681
## D.terms.n.stem.stop.Ratio                                       22763438
## D.npnct01.log                                                   -4292132
## .rnorm                                                          -9642836
## storage.fctr16                                                  -8965489
## storage.fctr32                                                  -4602237
## storage.fctr64                                                  20428806
## storage.fctrUnknown                                             -6038845
## D.npnct11.log                                                   21101189
## D.npnct10.log                                                  -24148586
## D.TfIdf.sum.post.stop                                           19920661
## D.TfIdf.sum.post.stem                                          -15288767
## D.sum.TfIdf                                                           NA
## `prdl.my.descr.fctrUnknown#1`                                  -22712649
## `prdl.my.descr.fctriPad 1#0`                                   -11999998
## `prdl.my.descr.fctriPad 1#1`                                    14250000
## `prdl.my.descr.fctriPad 2#0`                                    32117430
## `prdl.my.descr.fctriPad 2#1`                                    33455148
## `prdl.my.descr.fctriPad 3+#0`                                   19367533
## `prdl.my.descr.fctriPad 3+#1`                                    1792204
## `prdl.my.descr.fctriPadAir#0`                                   -2896485
## `prdl.my.descr.fctriPadAir#1`                                    2103347
## `prdl.my.descr.fctriPadmini 2+#0`                              -13805954
## `prdl.my.descr.fctriPadmini 2+#1`                               12563668
## `prdl.my.descr.fctriPadmini#0`                                  19268065
## `prdl.my.descr.fctriPadmini#1`                                  -7738752
## D.npnct13.log                                                  -22216331
## color.fctrGold                                                 -19767596
## `color.fctrSpace Gray`                                         -60635936
## color.fctrUnknown                                              -44825118
## color.fctrWhite                                                -37477072
## D.npnct08.log                                                  -20958642
## D.npnct16.log                                                    7991997
## D.npnct24.log                                                         NA
## D.nstopwrds.log                                                 37126505
## D.npnct06.log                                                  -24085489
## D.npnct28.log                                                   16595802
## D.nuppr.log                                                      9363991
## D.npnct12.log                                                  -20471630
## D.npnct09.log                                                  -36787965
## D.ndgts.log                                                     14177512
## D.nwrds.unq.log                                                -22717560
## D.terms.n.post.stem.log                                               NA
## D.terms.n.post.stop.log                                         22667939
## D.npnct14.log                                                   -4341800
## D.terms.n.post.stem                                             22132793
## D.terms.n.post.stop                                            -21693373
## D.npnct05.log                                                  -47474833
## `condition.fctrFor parts or not working`                        -7291576
## `condition.fctrManufacturer refurbished`                         5920619
## condition.fctrNew                                              -15180688
## `condition.fctrNew other (see details)`                         26946403
## `condition.fctrSeller refurbished`                             -34177168
## idseq.my                                                       -29436240
## D.ratio.sum.TfIdf.nwrds                                         18979532
## D.TfIdf.sum.stem.stop.Ratio                                     17854462
## D.npnct15.log                                                   -8633413
## D.npnct03.log                                                    1026929
## D.nwrds.log                                                      8597257
## D.nchrs.log                                                    -10292175
## startprice.diff                                               -124676756
## biddable                                                       199579657
## cellular.fctr1                                                 -13739369
## cellular.fctrUnknown                                            -1917584
## carrier.fctrNone                                                      NA
## carrier.fctrOther                                               53751445
## carrier.fctrSprint                                              18442313
## `carrier.fctrT-Mobile`                                          -3060319
## carrier.fctrUnknown                                             -9978042
## carrier.fctrVerizon                                             29108788
## `prdl.my.descr.fctrUnknown#1:idseq.my`                          57175266
## `prdl.my.descr.fctriPad 1#0:idseq.my`                           24373715
## `prdl.my.descr.fctriPad 1#1:idseq.my`                           31414124
## `prdl.my.descr.fctriPad 2#0:idseq.my`                            7095278
## `prdl.my.descr.fctriPad 2#1:idseq.my`                            5445148
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                          14221391
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                          25450135
## `prdl.my.descr.fctriPadAir#0:idseq.my`                          24159017
## `prdl.my.descr.fctriPadAir#1:idseq.my`                          41074047
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                      15721200
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                       3603156
## `prdl.my.descr.fctriPadmini#0:idseq.my`                          9433712
## `prdl.my.descr.fctriPadmini#1:idseq.my`                          9284120
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`          -25963910
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`           -11543780
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`                  NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`           -47160319
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`          -50684651
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`                 NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`          -32178024
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`             NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`       -8962002
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`                NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`       26287065
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`        -4269560
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`              NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`       -28476869
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`       -2135938
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`             NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`         275045
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`   41359651
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`            NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                      15898875
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                       6141172
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                     11231896
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                           NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                       NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                     -18934461
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                            NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                     -21436269
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                    -18960300
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                           NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                     19422530
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                       NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                 63668025
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                          NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                          NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                       15373709
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                        24935480
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                        -1082084
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                      -27415165
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                      -14367260
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                   57339679
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                            NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                      -15212124
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                       -26005271
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                              NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                        -9671646
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                       16827353
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                             NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                        5488661
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                         NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                  -73082396
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                            NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                            NA
## `startprice.diff:biddable`                                    -245898336
## `cellular.fctr1:carrier.fctrNone`                                     NA
## `cellular.fctrUnknown:carrier.fctrNone`                               NA
## `cellular.fctr1:carrier.fctrOther`                                    NA
## `cellular.fctrUnknown:carrier.fctrOther`                              NA
## `cellular.fctr1:carrier.fctrSprint`                                   NA
## `cellular.fctrUnknown:carrier.fctrSprint`                             NA
## `cellular.fctr1:carrier.fctrT-Mobile`                                 NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                           NA
## `cellular.fctr1:carrier.fctrUnknown`                                  NA
## `cellular.fctrUnknown:carrier.fctrUnknown`                            NA
## `cellular.fctr1:carrier.fctrVerizon`                                  NA
## `cellular.fctrUnknown:carrier.fctrVerizon`                            NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                   2797477
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                  -22001461
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                   29366267
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                    982334
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                 -11507771
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`              20686015
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                 15999046
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                  -3102301
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                   -1549282
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                   -4043321
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                   8894908
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                 -21051291
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`             -49026094
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                 34876349
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                  46793175
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                    4838038
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                   32687376
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                 -46554561
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                 -32477580
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`             -50629819
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                 73626109
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                   72459654
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                   60008963
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                  61367816
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                 -24803447
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                 35894709
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                  -39137190
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                  -3297381
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                 46467723
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                         NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                  -2599850
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                        NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`                    NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                       NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                 10534715
##                                                               Pr(>|z|)    
## (Intercept)                                                     <2e-16 ***
## D.ratio.nstopwrds.nwrds                                         <2e-16 ***
## D.terms.n.stem.stop.Ratio                                       <2e-16 ***
## D.npnct01.log                                                   <2e-16 ***
## .rnorm                                                          <2e-16 ***
## storage.fctr16                                                  <2e-16 ***
## storage.fctr32                                                  <2e-16 ***
## storage.fctr64                                                  <2e-16 ***
## storage.fctrUnknown                                             <2e-16 ***
## D.npnct11.log                                                   <2e-16 ***
## D.npnct10.log                                                   <2e-16 ***
## D.TfIdf.sum.post.stop                                           <2e-16 ***
## D.TfIdf.sum.post.stem                                           <2e-16 ***
## D.sum.TfIdf                                                         NA    
## `prdl.my.descr.fctrUnknown#1`                                   <2e-16 ***
## `prdl.my.descr.fctriPad 1#0`                                    <2e-16 ***
## `prdl.my.descr.fctriPad 1#1`                                    <2e-16 ***
## `prdl.my.descr.fctriPad 2#0`                                    <2e-16 ***
## `prdl.my.descr.fctriPad 2#1`                                    <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0`                                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#1`                                   <2e-16 ***
## `prdl.my.descr.fctriPadAir#0`                                   <2e-16 ***
## `prdl.my.descr.fctriPadAir#1`                                   <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0`                               <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#1`                               <2e-16 ***
## `prdl.my.descr.fctriPadmini#0`                                  <2e-16 ***
## `prdl.my.descr.fctriPadmini#1`                                  <2e-16 ***
## D.npnct13.log                                                   <2e-16 ***
## color.fctrGold                                                  <2e-16 ***
## `color.fctrSpace Gray`                                          <2e-16 ***
## color.fctrUnknown                                               <2e-16 ***
## color.fctrWhite                                                 <2e-16 ***
## D.npnct08.log                                                   <2e-16 ***
## D.npnct16.log                                                   <2e-16 ***
## D.npnct24.log                                                       NA    
## D.nstopwrds.log                                                 <2e-16 ***
## D.npnct06.log                                                   <2e-16 ***
## D.npnct28.log                                                   <2e-16 ***
## D.nuppr.log                                                     <2e-16 ***
## D.npnct12.log                                                   <2e-16 ***
## D.npnct09.log                                                   <2e-16 ***
## D.ndgts.log                                                     <2e-16 ***
## D.nwrds.unq.log                                                 <2e-16 ***
## D.terms.n.post.stem.log                                             NA    
## D.terms.n.post.stop.log                                         <2e-16 ***
## D.npnct14.log                                                   <2e-16 ***
## D.terms.n.post.stem                                             <2e-16 ***
## D.terms.n.post.stop                                             <2e-16 ***
## D.npnct05.log                                                   <2e-16 ***
## `condition.fctrFor parts or not working`                        <2e-16 ***
## `condition.fctrManufacturer refurbished`                        <2e-16 ***
## condition.fctrNew                                               <2e-16 ***
## `condition.fctrNew other (see details)`                         <2e-16 ***
## `condition.fctrSeller refurbished`                              <2e-16 ***
## idseq.my                                                        <2e-16 ***
## D.ratio.sum.TfIdf.nwrds                                         <2e-16 ***
## D.TfIdf.sum.stem.stop.Ratio                                     <2e-16 ***
## D.npnct15.log                                                   <2e-16 ***
## D.npnct03.log                                                   <2e-16 ***
## D.nwrds.log                                                     <2e-16 ***
## D.nchrs.log                                                     <2e-16 ***
## startprice.diff                                                 <2e-16 ***
## biddable                                                        <2e-16 ***
## cellular.fctr1                                                  <2e-16 ***
## cellular.fctrUnknown                                            <2e-16 ***
## carrier.fctrNone                                                    NA    
## carrier.fctrOther                                               <2e-16 ***
## carrier.fctrSprint                                              <2e-16 ***
## `carrier.fctrT-Mobile`                                          <2e-16 ***
## carrier.fctrUnknown                                             <2e-16 ***
## carrier.fctrVerizon                                             <2e-16 ***
## `prdl.my.descr.fctrUnknown#1:idseq.my`                          <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:idseq.my`                           <2e-16 ***
## `prdl.my.descr.fctriPad 1#1:idseq.my`                           <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:idseq.my`                           <2e-16 ***
## `prdl.my.descr.fctriPad 2#1:idseq.my`                           <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                          <2e-16 ***
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                          <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:idseq.my`                          <2e-16 ***
## `prdl.my.descr.fctriPadAir#1:idseq.my`                          <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                      <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                      <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:idseq.my`                         <2e-16 ***
## `prdl.my.descr.fctriPadmini#1:idseq.my`                         <2e-16 ***
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`           <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`                NA    
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`            <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`                NA    
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`            <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`               NA    
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`           <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`               NA    
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`           <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`           NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`       <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`              NA    
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`              NA    
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`       <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`            NA    
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`        <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`            NA    
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`        <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`           NA    
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`       <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`           NA    
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`       <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`       NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`   <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`          NA    
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`          NA    
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                         NA    
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                          NA    
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                      <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                          NA    
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                      <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                         NA    
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                     <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                         NA    
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                         NA    
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                     NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                     NA    
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                        NA    
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                        NA    
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                         NA    
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                          NA    
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                      <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                          NA    
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                      <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                         NA    
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                     <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                         NA    
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                     <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                     NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                 <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                        NA    
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                        NA    
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                       <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                            NA    
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                        <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                            NA    
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                        <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                           NA    
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                       <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                           NA    
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                       <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                       NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                   <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                          NA    
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                          NA    
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                       <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                            NA    
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                        <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                            NA    
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                        <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                           NA    
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                       <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                           NA    
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                       <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                       NA    
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                   <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                          NA    
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                          NA    
## `startprice.diff:biddable`                                      <2e-16 ***
## `cellular.fctr1:carrier.fctrNone`                                   NA    
## `cellular.fctrUnknown:carrier.fctrNone`                             NA    
## `cellular.fctr1:carrier.fctrOther`                                  NA    
## `cellular.fctrUnknown:carrier.fctrOther`                            NA    
## `cellular.fctr1:carrier.fctrSprint`                                 NA    
## `cellular.fctrUnknown:carrier.fctrSprint`                           NA    
## `cellular.fctr1:carrier.fctrT-Mobile`                               NA    
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                         NA    
## `cellular.fctr1:carrier.fctrUnknown`                                NA    
## `cellular.fctrUnknown:carrier.fctrUnknown`                          NA    
## `cellular.fctr1:carrier.fctrVerizon`                                NA    
## `cellular.fctrUnknown:carrier.fctrVerizon`                          NA    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                  <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                   <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                  <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`              <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                  <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                   <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                  <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`              <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                  <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                   <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                  <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`              <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                      NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                   <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                  <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`                  NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                      NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                   <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                      NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`                  NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                      NA    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                      NA    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                       NA    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                       NA    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                       NA    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                       NA    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                      NA    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                  <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                      NA    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                      NA    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`                  NA    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`                  NA    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                     NA    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  1337.5  on 968  degrees of freedom
## Residual deviance: 14705.8  on 825  degrees of freedom
## AIC: 14994
## 
## Number of Fisher Scoring iterations: 25
## 
## [1] "    calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7926829
## 3        0.2 0.7926829
## 4        0.3 0.7926829
## 5        0.4 0.7926829
## 6        0.5 0.7926829
## 7        0.6 0.7926829
## 8        0.7 0.7926829
## 9        0.8 0.7926829
## 10       0.9 0.7926829
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.glm.N
## 1         N                                    375
## 2         Y                                     57
##   sold.fctr.predict.All.Interact.X.glm.Y
## 1                                    147
## 2                                    390
##          Prediction
## Reference   N   Y
##         N 375 147
##         Y  57 390
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.894737e-01   5.824493e-01   7.624276e-01   8.147476e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.491405e-59   4.627387e-10 
## [1] "    calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6898803
## 3        0.2 0.6898803
## 4        0.3 0.6898803
## 5        0.4 0.6898803
## 6        0.5 0.6898803
## 7        0.6 0.6898803
## 8        0.7 0.6898803
## 9        0.8 0.6898803
## 10       0.9 0.6898803
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.Interact.X.glm.N
## 1         N                                    288
## 2         Y                                     96
##   sold.fctr.predict.All.Interact.X.glm.Y
## 1                                    189
## 2                                    317
##          Prediction
## Reference   N   Y
##         N 288 189
##         Y  96 317
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   6.797753e-01   3.658021e-01   6.480036e-01   7.103515e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.782162e-18   5.048049e-08 
##             model_id model_method
## 1 All.Interact.X.glm          glm
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      5.369                 2.009
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1    0.795437                    0.9       0.7926829         0.747162
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.7624276             0.8147476      0.488484    0.685664
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.9       0.6898803        0.6797753
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.6480036             0.7103515     0.3658021    14993.81
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01396053      0.02568968
##                   label step_major step_minor     bgn     end elapsed
## 7      fit.models_1_glm          7          0 148.261 157.327   9.066
## 8 fit.models_1_bayesglm          8          0 157.328      NA      NA
## [1] "fitting model: All.Interact.X.bayesglm"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3587  -0.5994  -0.1017   0.3551   2.6169  
## 
## Coefficients:
##                                                                 Estimate
## (Intercept)                                                   -2.653e+00
## D.ratio.nstopwrds.nwrds                                       -2.167e+00
## D.terms.n.stem.stop.Ratio                                      4.753e+00
## D.npnct01.log                                                 -2.386e-01
## .rnorm                                                         1.500e-02
## storage.fctr16                                                -6.798e-02
## storage.fctr32                                                 1.137e-01
## storage.fctr64                                                 6.562e-01
## storage.fctrUnknown                                            3.281e-01
## D.npnct11.log                                                 -7.325e-02
## D.npnct10.log                                                 -1.269e+00
## D.TfIdf.sum.post.stop                                          1.046e-01
## D.TfIdf.sum.post.stem                                          1.051e-01
## D.sum.TfIdf                                                    1.051e-01
## `prdl.my.descr.fctrUnknown#1`                                  7.537e-02
## `prdl.my.descr.fctriPad 1#0`                                  -2.070e-01
## `prdl.my.descr.fctriPad 1#1`                                  -4.760e-01
## `prdl.my.descr.fctriPad 2#0`                                   1.083e+00
## `prdl.my.descr.fctriPad 2#1`                                   5.819e-01
## `prdl.my.descr.fctriPad 3+#0`                                  5.832e-01
## `prdl.my.descr.fctriPad 3+#1`                                 -6.557e-02
## `prdl.my.descr.fctriPadAir#0`                                  3.586e-02
## `prdl.my.descr.fctriPadAir#1`                                  2.609e-01
## `prdl.my.descr.fctriPadmini 2+#0`                             -2.463e-01
## `prdl.my.descr.fctriPadmini 2+#1`                              1.483e-01
## `prdl.my.descr.fctriPadmini#0`                                 1.832e-01
## `prdl.my.descr.fctriPadmini#1`                                -3.643e-01
## D.npnct13.log                                                 -3.907e-01
## color.fctrGold                                                -8.959e-02
## `color.fctrSpace Gray`                                        -3.180e-01
## color.fctrUnknown                                             -3.474e-01
## color.fctrWhite                                               -8.177e-02
## D.npnct08.log                                                 -1.418e-01
## D.npnct16.log                                                  1.565e+00
## D.npnct24.log                                                  2.079e-01
## D.nstopwrds.log                                                2.343e-01
## D.npnct06.log                                                 -3.352e+00
## D.npnct28.log                                                 -3.704e-02
## D.nuppr.log                                                   -1.057e-01
## D.npnct12.log                                                 -1.324e+00
## D.npnct09.log                                                 -2.066e+00
## D.ndgts.log                                                    4.877e-01
## D.nwrds.unq.log                                               -2.573e-01
## D.terms.n.post.stem.log                                       -2.573e-01
## D.terms.n.post.stop.log                                       -2.581e-01
## D.npnct14.log                                                 -7.400e-01
## D.terms.n.post.stem                                           -6.054e-02
## D.terms.n.post.stop                                           -6.215e-02
## D.npnct05.log                                                 -2.834e+00
## `condition.fctrFor parts or not working`                       1.488e-01
## `condition.fctrManufacturer refurbished`                       3.869e-01
## condition.fctrNew                                             -3.767e-01
## `condition.fctrNew other (see details)`                        5.646e-01
## `condition.fctrSeller refurbished`                            -4.871e-01
## idseq.my                                                      -2.957e-04
## D.ratio.sum.TfIdf.nwrds                                       -6.310e-01
## D.TfIdf.sum.stem.stop.Ratio                                   -1.055e+00
## D.npnct15.log                                                  2.708e+00
## D.npnct03.log                                                  2.293e+00
## D.nwrds.log                                                   -3.197e-02
## D.nchrs.log                                                   -5.459e-02
## startprice.diff                                               -5.199e-03
## biddable                                                       4.116e+00
## cellular.fctr1                                                -4.187e-02
## cellular.fctrUnknown                                          -9.672e-02
## carrier.fctrNone                                               1.249e-01
## carrier.fctrOther                                              6.708e-01
## carrier.fctrSprint                                             1.650e-01
## `carrier.fctrT-Mobile`                                        -2.979e-01
## carrier.fctrUnknown                                           -1.697e-01
## carrier.fctrVerizon                                            2.657e-01
## `prdl.my.descr.fctrUnknown#1:idseq.my`                         1.470e-03
## `prdl.my.descr.fctriPad 1#0:idseq.my`                          7.129e-04
## `prdl.my.descr.fctriPad 1#1:idseq.my`                          5.414e-04
## `prdl.my.descr.fctriPad 2#0:idseq.my`                         -8.161e-04
## `prdl.my.descr.fctriPad 2#1:idseq.my`                          7.509e-05
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                        -1.910e-04
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                         5.453e-04
## `prdl.my.descr.fctriPadAir#0:idseq.my`                         2.623e-04
## `prdl.my.descr.fctriPadAir#1:idseq.my`                         5.057e-04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                     7.326e-04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                     7.196e-04
## `prdl.my.descr.fctriPadmini#0:idseq.my`                        5.693e-05
## `prdl.my.descr.fctriPadmini#1:idseq.my`                       -8.027e-05
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`         -6.718e-01
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`           0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`           1.531e+00
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`           0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`          -1.559e+00
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`          0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`         -7.664e-01
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`          0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`         -7.247e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`      0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`     -2.776e+00
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`         0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`         9.971e-01
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`      1.237e+00
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`      -2.070e-01
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`      -1.298e+00
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`       1.083e+00
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`       1.266e-01
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`      5.832e-01
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`     -3.667e-03
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`      3.586e-02
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`      1.157e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` -2.463e-01
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` -1.472e+00
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`     1.832e-01
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`    -2.060e-01
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                    0.000e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                     0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                     3.900e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                     0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                     2.195e-01
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                    0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                   -2.244e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                    0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                    5.343e-02
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                0.000e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                   0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                   0.000e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                    0.000e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                     0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                    -3.742e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                     0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                    -5.586e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                    0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                   -1.920e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                    0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                    1.119e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                5.254e-01
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                   0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                  -1.921e+00
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                     -4.809e-01
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                       0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                       5.513e-01
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                       0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                      -1.688e-01
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                      0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                     -1.856e-01
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                      0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                     -1.956e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                  0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                  8.057e-01
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                     0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                     2.046e-01
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                     -2.509e-01
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                       0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                      -5.761e-02
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                       0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                       1.331e-01
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                      0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                      4.468e-02
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                      0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                     -1.128e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                  0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                  1.140e-01
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                     0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                    -1.484e-01
## `startprice.diff:biddable`                                    -3.383e-02
## `cellular.fctr1:carrier.fctrNone`                              0.000e+00
## `cellular.fctrUnknown:carrier.fctrNone`                        0.000e+00
## `cellular.fctr1:carrier.fctrOther`                             6.708e-01
## `cellular.fctrUnknown:carrier.fctrOther`                       0.000e+00
## `cellular.fctr1:carrier.fctrSprint`                            1.650e-01
## `cellular.fctrUnknown:carrier.fctrSprint`                      0.000e+00
## `cellular.fctr1:carrier.fctrT-Mobile`                         -2.979e-01
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                    0.000e+00
## `cellular.fctr1:carrier.fctrUnknown`                          -8.627e-02
## `cellular.fctrUnknown:carrier.fctrUnknown`                    -9.672e-02
## `cellular.fctr1:carrier.fctrVerizon`                           2.657e-01
## `cellular.fctrUnknown:carrier.fctrVerizon`                     0.000e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                 5.553e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                 -3.964e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                  1.845e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                 6.181e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                 1.487e-02
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`            -2.392e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                1.487e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                 4.087e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                 -3.567e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                 -2.653e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                 1.722e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                -4.137e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`            -9.796e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                8.492e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                 1.353e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                 -1.805e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                  9.363e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                -1.524e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                -1.162e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`            -3.322e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                2.879e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                  9.492e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                  1.138e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                 2.090e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                 9.379e-02
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`             0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                1.415e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                  0.000e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                 -9.311e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                 3.186e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`             0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                2.961e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                  0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                  0.000e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                  0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                  0.000e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                -7.718e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`             0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`             0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`               -2.535e-01
##                                                               Std. Error
## (Intercept)                                                    8.232e+00
## D.ratio.nstopwrds.nwrds                                        2.607e+00
## D.terms.n.stem.stop.Ratio                                      6.670e+00
## D.npnct01.log                                                  8.154e-01
## .rnorm                                                         1.032e-01
## storage.fctr16                                                 4.882e-01
## storage.fctr32                                                 5.223e-01
## storage.fctr64                                                 5.132e-01
## storage.fctrUnknown                                            6.475e-01
## D.npnct11.log                                                  4.297e-01
## D.npnct10.log                                                  1.820e+00
## D.TfIdf.sum.post.stop                                          3.031e-01
## D.TfIdf.sum.post.stem                                          3.171e-01
## D.sum.TfIdf                                                    3.171e-01
## `prdl.my.descr.fctrUnknown#1`                                  2.310e+00
## `prdl.my.descr.fctriPad 1#0`                                   1.540e+00
## `prdl.my.descr.fctriPad 1#1`                                   2.311e+00
## `prdl.my.descr.fctriPad 2#0`                                   1.817e+00
## `prdl.my.descr.fctriPad 2#1`                                   2.272e+00
## `prdl.my.descr.fctriPad 3+#0`                                  1.566e+00
## `prdl.my.descr.fctriPad 3+#1`                                  2.205e+00
## `prdl.my.descr.fctriPadAir#0`                                  1.527e+00
## `prdl.my.descr.fctriPadAir#1`                                  2.228e+00
## `prdl.my.descr.fctriPadmini 2+#0`                              1.541e+00
## `prdl.my.descr.fctriPadmini 2+#1`                              2.430e+00
## `prdl.my.descr.fctriPadmini#0`                                 1.534e+00
## `prdl.my.descr.fctriPadmini#1`                                 2.305e+00
## D.npnct13.log                                                  4.233e-01
## color.fctrGold                                                 5.366e-01
## `color.fctrSpace Gray`                                         3.978e-01
## color.fctrUnknown                                              2.965e-01
## color.fctrWhite                                                3.167e-01
## D.npnct08.log                                                  8.697e-01
## D.npnct16.log                                                  2.176e+00
## D.npnct24.log                                                  2.892e+00
## D.nstopwrds.log                                                7.274e-01
## D.npnct06.log                                                  2.328e+00
## D.npnct28.log                                                  2.221e+00
## D.nuppr.log                                                    5.276e-01
## D.npnct12.log                                                  9.530e-01
## D.npnct09.log                                                  6.909e+00
## D.ndgts.log                                                    4.726e-01
## D.nwrds.unq.log                                                1.050e+00
## D.terms.n.post.stem.log                                        1.050e+00
## D.terms.n.post.stop.log                                        1.047e+00
## D.npnct14.log                                                  9.969e-01
## D.terms.n.post.stem                                            2.012e-01
## D.terms.n.post.stop                                            1.990e-01
## D.npnct05.log                                                  1.651e+00
## `condition.fctrFor parts or not working`                       4.368e-01
## `condition.fctrManufacturer refurbished`                       5.551e-01
## condition.fctrNew                                              3.257e-01
## `condition.fctrNew other (see details)`                        4.835e-01
## `condition.fctrSeller refurbished`                             5.210e-01
## idseq.my                                                       5.243e-04
## D.ratio.sum.TfIdf.nwrds                                        1.319e+00
## D.TfIdf.sum.stem.stop.Ratio                                    5.258e+00
## D.npnct15.log                                                  5.775e+00
## D.npnct03.log                                                  2.692e+00
## D.nwrds.log                                                    8.200e-01
## D.nchrs.log                                                    5.117e-01
## startprice.diff                                                1.586e-03
## biddable                                                       3.079e-01
## cellular.fctr1                                                 1.316e+00
## cellular.fctrUnknown                                           1.751e+00
## carrier.fctrNone                                               1.316e+00
## carrier.fctrOther                                              2.079e+00
## carrier.fctrSprint                                             1.523e+00
## `carrier.fctrT-Mobile`                                         1.561e+00
## carrier.fctrUnknown                                            1.332e+00
## carrier.fctrVerizon                                            1.478e+00
## `prdl.my.descr.fctrUnknown#1:idseq.my`                         1.337e-03
## `prdl.my.descr.fctriPad 1#0:idseq.my`                          8.322e-04
## `prdl.my.descr.fctriPad 1#1:idseq.my`                          9.228e-04
## `prdl.my.descr.fctriPad 2#0:idseq.my`                          1.267e-03
## `prdl.my.descr.fctriPad 2#1:idseq.my`                          8.597e-04
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                         7.783e-04
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                         7.792e-04
## `prdl.my.descr.fctriPadAir#0:idseq.my`                         7.210e-04
## `prdl.my.descr.fctriPadAir#1:idseq.my`                         8.250e-04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                     7.506e-04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                     1.855e-03
## `prdl.my.descr.fctriPadmini#0:idseq.my`                        7.260e-04
## `prdl.my.descr.fctriPadmini#1:idseq.my`                        1.007e-03
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`          1.636e+00
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`           2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`           1.981e+00
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`           2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`           1.637e+00
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`          2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`          1.637e+00
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`          2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`          1.566e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`      2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`      3.884e+00
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`         2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`         2.003e+00
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`      4.194e+00
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`       1.540e+00
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`       3.829e+00
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`       1.817e+00
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`       3.330e+00
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`      1.566e+00
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`      3.107e+00
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`      1.527e+00
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`      3.302e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`  1.541e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`  6.760e+00
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`     1.534e+00
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`     3.890e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                    2.500e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                     2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                     9.576e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                     2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                     3.330e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                    2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                    5.879e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                    2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                    3.493e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                2.500e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                   2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                   2.500e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                    2.500e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                     2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                     2.970e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                     2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                     3.083e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                    2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                    2.817e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                    2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                    2.427e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                1.990e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                   2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                   3.390e+00
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                      1.650e+00
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                       2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                       1.365e+00
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                       2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                       1.147e+00
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                      2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                      1.098e+00
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                      2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                      1.130e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                  2.338e+00
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                     2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                     1.377e+00
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                      1.141e+00
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                       2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                       9.179e-01
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                       2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                       8.052e-01
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                      2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                      7.224e-01
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                      2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                      7.662e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                  2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                  1.576e+00
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                     2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                     9.482e-01
## `startprice.diff:biddable`                                     4.868e-03
## `cellular.fctr1:carrier.fctrNone`                              2.500e+00
## `cellular.fctrUnknown:carrier.fctrNone`                        2.500e+00
## `cellular.fctr1:carrier.fctrOther`                             2.079e+00
## `cellular.fctrUnknown:carrier.fctrOther`                       2.500e+00
## `cellular.fctr1:carrier.fctrSprint`                            1.523e+00
## `cellular.fctrUnknown:carrier.fctrSprint`                      2.500e+00
## `cellular.fctr1:carrier.fctrT-Mobile`                          1.561e+00
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                    2.500e+00
## `cellular.fctr1:carrier.fctrUnknown`                           1.343e+00
## `cellular.fctrUnknown:carrier.fctrUnknown`                     1.751e+00
## `cellular.fctr1:carrier.fctrVerizon`                           1.478e+00
## `cellular.fctrUnknown:carrier.fctrVerizon`                     2.500e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                 9.924e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                  9.688e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                  9.159e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                 8.271e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                 7.971e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`             1.455e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                1.138e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                 1.241e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                  1.154e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                  1.100e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                 1.142e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                 1.098e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`             1.532e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                1.133e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                 1.681e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                  1.201e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                  9.894e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                 1.606e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                 1.080e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`             2.039e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                1.838e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                  1.836e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                  1.141e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                 8.804e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                 1.807e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`             2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                1.215e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                  2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                  1.217e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                 1.385e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`             2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                1.308e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                  2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                  2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                  2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                  2.500e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                 1.768e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`             2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`             2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                1.713e+00
##                                                               z value
## (Intercept)                                                    -0.322
## D.ratio.nstopwrds.nwrds                                        -0.831
## D.terms.n.stem.stop.Ratio                                       0.713
## D.npnct01.log                                                  -0.293
## .rnorm                                                          0.145
## storage.fctr16                                                 -0.139
## storage.fctr32                                                  0.218
## storage.fctr64                                                  1.279
## storage.fctrUnknown                                             0.507
## D.npnct11.log                                                  -0.170
## D.npnct10.log                                                  -0.697
## D.TfIdf.sum.post.stop                                           0.345
## D.TfIdf.sum.post.stem                                           0.331
## D.sum.TfIdf                                                     0.331
## `prdl.my.descr.fctrUnknown#1`                                   0.033
## `prdl.my.descr.fctriPad 1#0`                                   -0.134
## `prdl.my.descr.fctriPad 1#1`                                   -0.206
## `prdl.my.descr.fctriPad 2#0`                                    0.596
## `prdl.my.descr.fctriPad 2#1`                                    0.256
## `prdl.my.descr.fctriPad 3+#0`                                   0.372
## `prdl.my.descr.fctriPad 3+#1`                                  -0.030
## `prdl.my.descr.fctriPadAir#0`                                   0.023
## `prdl.my.descr.fctriPadAir#1`                                   0.117
## `prdl.my.descr.fctriPadmini 2+#0`                              -0.160
## `prdl.my.descr.fctriPadmini 2+#1`                               0.061
## `prdl.my.descr.fctriPadmini#0`                                  0.119
## `prdl.my.descr.fctriPadmini#1`                                 -0.158
## D.npnct13.log                                                  -0.923
## color.fctrGold                                                 -0.167
## `color.fctrSpace Gray`                                         -0.800
## color.fctrUnknown                                              -1.171
## color.fctrWhite                                                -0.258
## D.npnct08.log                                                  -0.163
## D.npnct16.log                                                   0.719
## D.npnct24.log                                                   0.072
## D.nstopwrds.log                                                 0.322
## D.npnct06.log                                                  -1.440
## D.npnct28.log                                                  -0.017
## D.nuppr.log                                                    -0.200
## D.npnct12.log                                                  -1.390
## D.npnct09.log                                                  -0.299
## D.ndgts.log                                                     1.032
## D.nwrds.unq.log                                                -0.245
## D.terms.n.post.stem.log                                        -0.245
## D.terms.n.post.stop.log                                        -0.247
## D.npnct14.log                                                  -0.742
## D.terms.n.post.stem                                            -0.301
## D.terms.n.post.stop                                            -0.312
## D.npnct05.log                                                  -1.716
## `condition.fctrFor parts or not working`                        0.341
## `condition.fctrManufacturer refurbished`                        0.697
## condition.fctrNew                                              -1.157
## `condition.fctrNew other (see details)`                         1.168
## `condition.fctrSeller refurbished`                             -0.935
## idseq.my                                                       -0.564
## D.ratio.sum.TfIdf.nwrds                                        -0.478
## D.TfIdf.sum.stem.stop.Ratio                                    -0.201
## D.npnct15.log                                                   0.469
## D.npnct03.log                                                   0.852
## D.nwrds.log                                                    -0.039
## D.nchrs.log                                                    -0.107
## startprice.diff                                                -3.278
## biddable                                                       13.368
## cellular.fctr1                                                 -0.032
## cellular.fctrUnknown                                           -0.055
## carrier.fctrNone                                                0.095
## carrier.fctrOther                                               0.323
## carrier.fctrSprint                                              0.108
## `carrier.fctrT-Mobile`                                         -0.191
## carrier.fctrUnknown                                            -0.127
## carrier.fctrVerizon                                             0.180
## `prdl.my.descr.fctrUnknown#1:idseq.my`                          1.100
## `prdl.my.descr.fctriPad 1#0:idseq.my`                           0.857
## `prdl.my.descr.fctriPad 1#1:idseq.my`                           0.587
## `prdl.my.descr.fctriPad 2#0:idseq.my`                          -0.644
## `prdl.my.descr.fctriPad 2#1:idseq.my`                           0.087
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                         -0.245
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                          0.700
## `prdl.my.descr.fctriPadAir#0:idseq.my`                          0.364
## `prdl.my.descr.fctriPadAir#1:idseq.my`                          0.613
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                      0.976
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                      0.388
## `prdl.my.descr.fctriPadmini#0:idseq.my`                         0.078
## `prdl.my.descr.fctriPadmini#1:idseq.my`                        -0.080
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`          -0.411
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`            0.000
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`            0.773
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`            0.000
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`           -0.952
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`           0.000
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`          -0.468
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`           0.000
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`          -0.463
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`       0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`      -0.715
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`          0.000
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`          0.498
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`       0.295
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`       -0.134
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`       -0.339
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`        0.596
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`        0.038
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`       0.372
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`      -0.001
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`       0.023
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`       0.351
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`  -0.160
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`  -0.218
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`      0.119
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`     -0.053
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                     0.000
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                      0.000
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                      0.407
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                      0.000
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                      0.066
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                     0.000
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                    -0.382
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                     0.000
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                     0.015
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                 0.000
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                    0.000
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                    0.000
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                     0.000
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                      0.000
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                     -1.260
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                      0.000
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                     -1.812
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                     0.000
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                    -0.681
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                     0.000
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                     0.461
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                 0.264
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                    0.000
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                   -0.567
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                      -0.291
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                        0.000
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                        0.404
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                        0.000
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                       -0.147
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                       0.000
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                      -0.169
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                       0.000
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                      -0.173
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                   0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                   0.345
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                      0.000
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                      0.149
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                      -0.220
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                        0.000
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                       -0.063
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                        0.000
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                        0.165
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                       0.000
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                       0.062
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                       0.000
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                      -0.147
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                   0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                   0.072
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                      0.000
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                     -0.157
## `startprice.diff:biddable`                                     -6.949
## `cellular.fctr1:carrier.fctrNone`                               0.000
## `cellular.fctrUnknown:carrier.fctrNone`                         0.000
## `cellular.fctr1:carrier.fctrOther`                              0.323
## `cellular.fctrUnknown:carrier.fctrOther`                        0.000
## `cellular.fctr1:carrier.fctrSprint`                             0.108
## `cellular.fctrUnknown:carrier.fctrSprint`                       0.000
## `cellular.fctr1:carrier.fctrT-Mobile`                          -0.191
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                     0.000
## `cellular.fctr1:carrier.fctrUnknown`                           -0.064
## `cellular.fctrUnknown:carrier.fctrUnknown`                     -0.055
## `cellular.fctr1:carrier.fctrVerizon`                            0.180
## `cellular.fctrUnknown:carrier.fctrVerizon`                      0.000
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                  0.559
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                  -0.409
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                   0.201
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                  0.747
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                  0.019
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`             -0.164
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                 0.131
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                  0.329
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                  -0.309
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                  -0.241
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                  0.151
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                 -0.377
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`             -0.640
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                 0.749
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                  0.805
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                  -0.150
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                   0.946
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                 -0.949
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                 -1.076
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`             -0.163
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                 1.566
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                  0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                   0.517
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                   0.998
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                  2.374
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                  0.052
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`              0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                 1.164
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                  0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                   0.000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                  -0.765
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                  0.230
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                  0.000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`              0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                 0.226
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                  0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                  0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                   0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                   0.000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                   0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                   0.000
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                  0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                 -0.437
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                  0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                  0.000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`              0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`              0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                -0.148
##                                                               Pr(>|z|)    
## (Intercept)                                                    0.74723    
## D.ratio.nstopwrds.nwrds                                        0.40590    
## D.terms.n.stem.stop.Ratio                                      0.47605    
## D.npnct01.log                                                  0.76981    
## .rnorm                                                         0.88441    
## storage.fctr16                                                 0.88925    
## storage.fctr32                                                 0.82773    
## storage.fctr64                                                 0.20101    
## storage.fctrUnknown                                            0.61238    
## D.npnct11.log                                                  0.86465    
## D.npnct10.log                                                  0.48572    
## D.TfIdf.sum.post.stop                                          0.72999    
## D.TfIdf.sum.post.stem                                          0.74034    
## D.sum.TfIdf                                                    0.74034    
## `prdl.my.descr.fctrUnknown#1`                                  0.97398    
## `prdl.my.descr.fctriPad 1#0`                                   0.89304    
## `prdl.my.descr.fctriPad 1#1`                                   0.83680    
## `prdl.my.descr.fctriPad 2#0`                                   0.55131    
## `prdl.my.descr.fctriPad 2#1`                                   0.79781    
## `prdl.my.descr.fctriPad 3+#0`                                  0.70954    
## `prdl.my.descr.fctriPad 3+#1`                                  0.97627    
## `prdl.my.descr.fctriPadAir#0`                                  0.98126    
## `prdl.my.descr.fctriPadAir#1`                                  0.90678    
## `prdl.my.descr.fctriPadmini 2+#0`                              0.87300    
## `prdl.my.descr.fctriPadmini 2+#1`                              0.95133    
## `prdl.my.descr.fctriPadmini#0`                                 0.90493    
## `prdl.my.descr.fctriPadmini#1`                                 0.87441    
## D.npnct13.log                                                  0.35604    
## color.fctrGold                                                 0.86741    
## `color.fctrSpace Gray`                                         0.42399    
## color.fctrUnknown                                              0.24145    
## color.fctrWhite                                                0.79628    
## D.npnct08.log                                                  0.87048    
## D.npnct16.log                                                  0.47192    
## D.npnct24.log                                                  0.94270    
## D.nstopwrds.log                                                0.74740    
## D.npnct06.log                                                  0.14989    
## D.npnct28.log                                                  0.98670    
## D.nuppr.log                                                    0.84121    
## D.npnct12.log                                                  0.16463    
## D.npnct09.log                                                  0.76489    
## D.ndgts.log                                                    0.30213    
## D.nwrds.unq.log                                                0.80647    
## D.terms.n.post.stem.log                                        0.80647    
## D.terms.n.post.stop.log                                        0.80527    
## D.npnct14.log                                                  0.45789    
## D.terms.n.post.stem                                            0.76350    
## D.terms.n.post.stop                                            0.75478    
## D.npnct05.log                                                  0.08608 .  
## `condition.fctrFor parts or not working`                       0.73344    
## `condition.fctrManufacturer refurbished`                       0.48580    
## condition.fctrNew                                              0.24746    
## `condition.fctrNew other (see details)`                        0.24292    
## `condition.fctrSeller refurbished`                             0.34980    
## idseq.my                                                       0.57275    
## D.ratio.sum.TfIdf.nwrds                                        0.63231    
## D.TfIdf.sum.stem.stop.Ratio                                    0.84104    
## D.npnct15.log                                                  0.63913    
## D.npnct03.log                                                  0.39441    
## D.nwrds.log                                                    0.96890    
## D.nchrs.log                                                    0.91504    
## startprice.diff                                                0.00105 ** 
## biddable                                                       < 2e-16 ***
## cellular.fctr1                                                 0.97462    
## cellular.fctrUnknown                                           0.95595    
## carrier.fctrNone                                               0.92438    
## carrier.fctrOther                                              0.74700    
## carrier.fctrSprint                                             0.91372    
## `carrier.fctrT-Mobile`                                         0.84870    
## carrier.fctrUnknown                                            0.89861    
## carrier.fctrVerizon                                            0.85734    
## `prdl.my.descr.fctrUnknown#1:idseq.my`                         0.27126    
## `prdl.my.descr.fctriPad 1#0:idseq.my`                          0.39165    
## `prdl.my.descr.fctriPad 1#1:idseq.my`                          0.55742    
## `prdl.my.descr.fctriPad 2#0:idseq.my`                          0.51966    
## `prdl.my.descr.fctriPad 2#1:idseq.my`                          0.93040    
## `prdl.my.descr.fctriPad 3+#0:idseq.my`                         0.80614    
## `prdl.my.descr.fctriPad 3+#1:idseq.my`                         0.48406    
## `prdl.my.descr.fctriPadAir#0:idseq.my`                         0.71600    
## `prdl.my.descr.fctriPadAir#1:idseq.my`                         0.53988    
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my`                     0.32903    
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my`                     0.69809    
## `prdl.my.descr.fctriPadmini#0:idseq.my`                        0.93750    
## `prdl.my.descr.fctriPadmini#1:idseq.my`                        0.93645    
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds`          0.68141    
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds`           1.00000    
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds`           0.43967    
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds`           1.00000    
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds`           0.34093    
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds`          1.00000    
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds`          0.63967    
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds`          1.00000    
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds`          0.64346    
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds`      1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds`      0.47475    
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds`         1.00000    
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds`         0.61871    
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio`      0.76809    
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio`       0.89304    
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio`       0.73458    
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio`       0.55131    
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio`       0.96967    
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio`      0.70954    
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio`      0.99906    
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio`      0.98126    
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio`      0.72596    
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio`  0.87300    
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio`  0.82758    
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio`     0.90493    
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio`     0.95777    
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log`                    1.00000    
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log`                     1.00000    
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log`                     0.68377    
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log`                     1.00000    
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log`                     0.94744    
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log`                    1.00000    
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log`                    0.70271    
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log`                    1.00000    
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log`                    0.98780    
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log`                1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log`                1.00000    
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log`                   1.00000    
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log`                   1.00000    
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log`                    1.00000    
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log`                     1.00000    
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log`                     0.20779    
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log`                     1.00000    
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log`                     0.06999 .  
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log`                    1.00000    
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log`                    0.49563    
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log`                    1.00000    
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log`                    0.64495    
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log`                1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log`                0.79182    
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log`                   1.00000    
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log`                   0.57090    
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log`                      0.77078    
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log`                       1.00000    
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log`                       0.68627    
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log`                       1.00000    
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log`                       0.88300    
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log`                      1.00000    
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log`                      0.86580    
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log`                      1.00000    
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log`                      0.86252    
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log`                  1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log`                  0.73039    
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log`                     1.00000    
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log`                     0.88187    
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log`                      0.82601    
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log`                       1.00000    
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log`                       0.94996    
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log`                       1.00000    
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log`                       0.86870    
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log`                      1.00000    
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log`                      0.95069    
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log`                      1.00000    
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log`                      0.88295    
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log`                  1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log`                  0.94237    
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log`                     1.00000    
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log`                     0.87563    
## `startprice.diff:biddable`                                    3.68e-12 ***
## `cellular.fctr1:carrier.fctrNone`                              1.00000    
## `cellular.fctrUnknown:carrier.fctrNone`                        1.00000    
## `cellular.fctr1:carrier.fctrOther`                             0.74700    
## `cellular.fctrUnknown:carrier.fctrOther`                       1.00000    
## `cellular.fctr1:carrier.fctrSprint`                            0.91372    
## `cellular.fctrUnknown:carrier.fctrSprint`                      1.00000    
## `cellular.fctr1:carrier.fctrT-Mobile`                          0.84870    
## `cellular.fctrUnknown:carrier.fctrT-Mobile`                    1.00000    
## `cellular.fctr1:carrier.fctrUnknown`                           0.94877    
## `cellular.fctrUnknown:carrier.fctrUnknown`                     0.95595    
## `cellular.fctr1:carrier.fctrVerizon`                           0.85734    
## `cellular.fctrUnknown:carrier.fctrVerizon`                     1.00000    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2`                 0.57583    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2`                  0.68243    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2`                  0.84031    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2`                 0.45487    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2`                 0.98512    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2`             0.86940    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2`                0.89599    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3`                 0.74184    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3`                  0.75725    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3`                  0.80950    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3`                 0.88015    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3`                 0.70636    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3`             0.52247    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3`                0.45359    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4`                 0.42093    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4`                  0.88050    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4`                  0.34396    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4`                 0.34256    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4`                 0.28199    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4`             0.87061    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4`                0.11726    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5`                 1.00000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5`                  0.60521    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5`                  0.31847    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5`                 0.01757 *  
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5`                 0.95860    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5`             1.00000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5`                0.24430    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6`                 1.00000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6`                  1.00000    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6`                  0.44435    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6`                 0.81802    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6`                 1.00000    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6`             1.00000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6`                0.82097    
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7`                 1.00000    
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7`                 1.00000    
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7`                  1.00000    
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7`                  1.00000    
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7`                  1.00000    
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7`                  1.00000    
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7`                 1.00000    
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7`                 0.66243    
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7`                 1.00000    
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7`                 1.00000    
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7`             1.00000    
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7`             1.00000    
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7`                1.00000    
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7`                0.88236    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1337.51  on 968  degrees of freedom
## Residual deviance:  646.38  on 710  degrees of freedom
## AIC: 1164.4
## 
## Number of Fisher Scoring iterations: 20
## 
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.7112903
## 3        0.2 0.8026949
## 4        0.3 0.8430108
## 5        0.4 0.8505747
## 6        0.5 0.8397129
## 7        0.6 0.8296296
## 8        0.7 0.8178344
## 9        0.8 0.7911227
## 10       0.9 0.7170868
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.bayesglm.N
## 1         N                                         469
## 2         Y                                          77
##   sold.fctr.predict.All.Interact.X.bayesglm.Y
## 1                                          53
## 2                                         370
##          Prediction
## Reference   N   Y
##         N 469  53
##         Y  77 370
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.658411e-01   7.290199e-01   8.427522e-01   8.866791e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##  3.005592e-105   4.367116e-02 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.7027027
## 3        0.2 0.7317073
## 4        0.3 0.7519819
## 5        0.4 0.7768396
## 6        0.5 0.7949367
## 7        0.6 0.7885117
## 8        0.7 0.7783784
## 9        0.8 0.7612360
## 10       0.9 0.7000000
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.Interact.X.bayesglm.N
## 1         N                                         414
## 2         Y                                          99
##   sold.fctr.predict.All.Interact.X.bayesglm.Y
## 1                                          63
## 2                                         314
##          Prediction
## Reference   N   Y
##         N 414  63
##         Y  99 314
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.179775e-01   6.319103e-01   7.910266e-01   8.428037e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   6.921274e-70   5.962079e-03 
##                  model_id model_method
## 1 All.Interact.X.bayesglm     bayesglm
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               1                      6.633                 2.293
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.9286388                    0.4       0.8505747        0.7925697
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8427522             0.8866791      0.580097   0.8660362
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.7949367        0.8179775
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1             0.7910266             0.8428037     0.6319103    1164.383
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.03516352      0.07034406
##                   label step_major step_minor     bgn     end elapsed
## 8 fit.models_1_bayesglm          8          0 157.328 167.609  10.281
## 9   fit.models_1_glmnet          9          0 167.610      NA      NA
## [1] "fitting model: All.Interact.X.glmnet"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0559 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda

##             Length Class      Mode     
## a0             93  -none-     numeric  
## beta        23994  dgCMatrix  S4       
## df             93  -none-     numeric  
## dim             2  -none-     numeric  
## lambda         93  -none-     numeric  
## dev.ratio      93  -none-     numeric  
## nulldev         1  -none-     numeric  
## npasses         1  -none-     numeric  
## jerr            1  -none-     numeric  
## offset          1  -none-     logical  
## classnames      2  -none-     character
## call            5  -none-     call     
## nobs            1  -none-     numeric  
## lambdaOpt       1  -none-     numeric  
## xNames        258  -none-     character
## problemType     1  -none-     character
## tuneValue       2  data.frame list     
## obsLevels       2  -none-     character
## [1] "min lambda > lambdaOpt:"
##                                             (Intercept) 
##                                           -0.9197893866 
##                              prdl.my.descr.fctriPad 2#0 
##                                            0.1124207463 
##                             prdl.my.descr.fctriPad 3+#0 
##                                            0.0286697832 
##                                     D.terms.n.post.stem 
##                                           -0.0003862821 
##                                     D.terms.n.post.stop 
##                                           -0.0056444603 
##                                           D.npnct05.log 
##                                           -0.3320068978 
##                                       condition.fctrNew 
##                                           -0.0856349394 
##                                         startprice.diff 
##                                           -0.0026676704 
##                                                biddable 
##                                            2.1997091789 
##  prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio 
##                                            0.1087054174 
## prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio 
##                                            0.0260983081 
##                prdl.my.descr.fctriPad 1#1:D.npnct15.log 
##                                            0.4788520325 
##                                startprice.diff:biddable 
##                                           -0.0077048034 
## [1] "max lambda < lambdaOpt:"
##                                                 (Intercept) 
##                                                3.505430e+00 
##                                     D.ratio.nstopwrds.nwrds 
##                                               -7.485809e+00 
##                                   D.terms.n.stem.stop.Ratio 
##                                                3.890518e+00 
##                                               D.npnct01.log 
##                                               -4.947610e-01 
##                                                      .rnorm 
##                                                1.444546e-02 
##                                              storage.fctr16 
##                                               -1.822934e-02 
##                                              storage.fctr32 
##                                                2.306846e-01 
##                                              storage.fctr64 
##                                                7.253402e-01 
##                                         storage.fctrUnknown 
##                                                3.418000e-01 
##                                               D.npnct11.log 
##                                               -4.513656e-02 
##                                               D.npnct10.log 
##                                               -1.059943e+00 
##                                       D.TfIdf.sum.post.stop 
##                                                1.508231e-01 
##                                       D.TfIdf.sum.post.stem 
##                                                1.454770e-01 
##                                                 D.sum.TfIdf 
##                                                1.118016e-01 
##                                 prdl.my.descr.fctrUnknown#1 
##                                               -1.146050e-01 
##                                  prdl.my.descr.fctriPad 1#0 
##                                               -2.712859e-01 
##                                  prdl.my.descr.fctriPad 1#1 
##                                               -6.344868e+00 
##                                  prdl.my.descr.fctriPad 2#0 
##                                                1.446729e+00 
##                                  prdl.my.descr.fctriPad 2#1 
##                                                6.829306e+00 
##                                 prdl.my.descr.fctriPad 3+#0 
##                                                5.985366e-01 
##                                 prdl.my.descr.fctriPad 3+#1 
##                                               -3.607925e-01 
##                                 prdl.my.descr.fctriPadAir#0 
##                                                3.721328e-02 
##                                 prdl.my.descr.fctriPadAir#1 
##                                                8.712179e-01 
##                             prdl.my.descr.fctriPadmini 2+#0 
##                                               -2.330970e-01 
##                             prdl.my.descr.fctriPadmini 2+#1 
##                                                3.464816e+00 
##                                prdl.my.descr.fctriPadmini#0 
##                                                1.511328e-01 
##                                prdl.my.descr.fctriPadmini#1 
##                                               -4.872842e+00 
##                                               D.npnct13.log 
##                                               -5.743303e-01 
##                                              color.fctrGold 
##                                               -1.754049e-01 
##                                        color.fctrSpace Gray 
##                                               -3.258811e-01 
##                                           color.fctrUnknown 
##                                               -3.811892e-01 
##                                             color.fctrWhite 
##                                               -8.173930e-02 
##                                               D.npnct08.log 
##                                               -1.620891e-01 
##                                               D.npnct16.log 
##                                                2.466960e+00 
##                                             D.nstopwrds.log 
##                                                1.695590e+00 
##                                               D.npnct06.log 
##                                               -4.930549e+00 
##                                                 D.nuppr.log 
##                                               -4.584746e-01 
##                                               D.npnct12.log 
##                                               -1.209235e+00 
##                                               D.npnct09.log 
##                                               -2.675262e+00 
##                                                 D.ndgts.log 
##                                                3.918722e-01 
##                                             D.nwrds.unq.log 
##                                               -8.412111e-01 
##                                     D.terms.n.post.stem.log 
##                                               -6.302781e-01 
##                                     D.terms.n.post.stop.log 
##                                               -4.619317e-01 
##                                               D.npnct14.log 
##                                               -5.909816e-01 
##                                         D.terms.n.post.stem 
##                                               -1.921138e-03 
##                                         D.terms.n.post.stop 
##                                               -5.057612e-02 
##                                               D.npnct05.log 
##                                               -3.556868e+00 
##                      condition.fctrFor parts or not working 
##                                                1.944876e-01 
##                      condition.fctrManufacturer refurbished 
##                                                3.955795e-01 
##                                           condition.fctrNew 
##                                               -3.672162e-01 
##                       condition.fctrNew other (see details) 
##                                                6.057331e-01 
##                            condition.fctrSeller refurbished 
##                                               -5.083360e-01 
##                                                    idseq.my 
##                                               -3.522800e-04 
##                                     D.ratio.sum.TfIdf.nwrds 
##                                               -9.770894e-01 
##                                 D.TfIdf.sum.stem.stop.Ratio 
##                                               -1.139681e+00 
##                                               D.npnct15.log 
##                                                4.670504e-01 
##                                                 D.nwrds.log 
##                                               -4.159284e-01 
##                                                 D.nchrs.log 
##                                               -1.421896e-01 
##                                             startprice.diff 
##                                               -4.904189e-03 
##                                                    biddable 
##                                                4.268450e+00 
##                                        cellular.fctrUnknown 
##                                               -4.562886e-05 
##                                            carrier.fctrNone 
##                                                1.951305e-01 
##                                           carrier.fctrOther 
##                                                4.731453e+00 
##                                          carrier.fctrSprint 
##                                                2.250031e-01 
##                                        carrier.fctrT-Mobile 
##                                               -4.696237e-01 
##                                         carrier.fctrUnknown 
##                                               -2.365238e-01 
##                                         carrier.fctrVerizon 
##                                                1.912790e-01 
##                        prdl.my.descr.fctrUnknown#1:idseq.my 
##                                                1.450785e-03 
##                         prdl.my.descr.fctriPad 1#0:idseq.my 
##                                                8.426993e-04 
##                         prdl.my.descr.fctriPad 1#1:idseq.my 
##                                                5.980170e-04 
##                         prdl.my.descr.fctriPad 2#0:idseq.my 
##                                               -1.308800e-03 
##                         prdl.my.descr.fctriPad 2#1:idseq.my 
##                                                1.008940e-04 
##                        prdl.my.descr.fctriPad 3+#0:idseq.my 
##                                               -2.091481e-04 
##                        prdl.my.descr.fctriPad 3+#1:idseq.my 
##                                                5.650835e-04 
##                        prdl.my.descr.fctriPadAir#0:idseq.my 
##                                                3.101896e-04 
##                        prdl.my.descr.fctriPadAir#1:idseq.my 
##                                                5.957307e-04 
##                    prdl.my.descr.fctriPadmini 2+#0:idseq.my 
##                                                8.384266e-04 
##                    prdl.my.descr.fctriPadmini 2+#1:idseq.my 
##                                                6.904061e-04 
##                       prdl.my.descr.fctriPadmini#0:idseq.my 
##                                                1.024183e-04 
##         prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds 
##                                               -5.953414e-01 
##          prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds 
##                                                3.470455e+00 
##          prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds 
##                                               -2.333921e+00 
##         prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds 
##                                               -1.156997e+00 
##         prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds 
##                                               -1.370894e+00 
##     prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds 
##                                               -4.856173e+00 
##        prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds 
##                                                2.611292e+00 
##     prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio 
##                                                1.430409e+00 
##      prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio 
##                                               -2.635255e-01 
##      prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio 
##                                               -7.282856e-01 
##      prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio 
##                                                1.428462e+00 
##      prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio 
##                                               -4.745476e+00 
##     prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio 
##                                                6.248226e-01 
##     prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio 
##                                                1.008090e+00 
##     prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio 
##                                                4.689379e-02 
##     prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio 
##                                                3.369365e+00 
## prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio 
##                                               -3.230604e-01 
## prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio 
##                                               -2.605974e+00 
##    prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio 
##                                                2.093205e-01 
##    prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio 
##                                               -2.725530e-04 
##                    prdl.my.descr.fctriPad 1#1:D.npnct15.log 
##                                                7.631379e+00 
##                    prdl.my.descr.fctriPad 2#1:D.npnct15.log 
##                                                6.449691e+00 
##                   prdl.my.descr.fctriPadAir#1:D.npnct15.log 
##                                                3.450945e+00 
##                    prdl.my.descr.fctriPad 1#1:D.npnct03.log 
##                                               -1.496541e+00 
##                    prdl.my.descr.fctriPad 2#1:D.npnct03.log 
##                                               -3.487919e+00 
##                   prdl.my.descr.fctriPad 3+#1:D.npnct03.log 
##                                               -5.816703e+00 
##                   prdl.my.descr.fctriPadAir#1:D.npnct03.log 
##                                                5.543516e+00 
##               prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log 
##                                                9.796775e+00 
##                  prdl.my.descr.fctriPadmini#1:D.npnct03.log 
##                                                7.826695e-01 
##                     prdl.my.descr.fctrUnknown#1:D.nwrds.log 
##                                               -2.545165e-01 
##                      prdl.my.descr.fctriPad 1#1:D.nwrds.log 
##                                                2.056715e+00 
##                      prdl.my.descr.fctriPad 2#1:D.nwrds.log 
##                                               -8.771649e-01 
##                     prdl.my.descr.fctriPad 3+#1:D.nwrds.log 
##                                               -8.962667e-01 
##                     prdl.my.descr.fctriPadAir#1:D.nwrds.log 
##                                               -9.885393e-01 
##                 prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log 
##                                                1.897646e+00 
##                    prdl.my.descr.fctriPadmini#1:D.nwrds.log 
##                                                3.966039e-01 
##                     prdl.my.descr.fctrUnknown#1:D.nchrs.log 
##                                               -6.771936e-01 
##                      prdl.my.descr.fctriPad 1#1:D.nchrs.log 
##                                               -1.868344e-01 
##                      prdl.my.descr.fctriPad 2#1:D.nchrs.log 
##                                                1.315852e-02 
##                     prdl.my.descr.fctriPad 3+#1:D.nchrs.log 
##                                                1.185281e-01 
##                     prdl.my.descr.fctriPadAir#1:D.nchrs.log 
##                                               -3.870701e-01 
##                 prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log 
##                                               -9.696981e-01 
##                                    startprice.diff:biddable 
##                                               -3.663213e-02 
##                            cellular.fctr1:carrier.fctrOther 
##                                                4.563711e+00 
##                           cellular.fctr1:carrier.fctrSprint 
##                                                1.189507e-01 
##                         cellular.fctr1:carrier.fctrT-Mobile 
##                                               -2.444122e-01 
##                    cellular.fctrUnknown:carrier.fctrUnknown 
##                                               -2.611744e-02 
##                          cellular.fctr1:carrier.fctrVerizon 
##                                                3.135925e-01 
##                prdl.my.descr.fctrUnknown#1:.clusterid.fctr2 
##                                                9.753772e-01 
##                 prdl.my.descr.fctriPad 1#1:.clusterid.fctr2 
##                                               -5.196912e-01 
##                 prdl.my.descr.fctriPad 2#1:.clusterid.fctr2 
##                                                5.806539e-01 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2 
##                                                9.373133e-01 
##                prdl.my.descr.fctriPadAir#1:.clusterid.fctr2 
##                                               -1.945250e-01 
##            prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2 
##                                               -1.011157e+00 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr2 
##                                                1.756957e+00 
##                prdl.my.descr.fctrUnknown#1:.clusterid.fctr3 
##                                                8.747031e-01 
##                 prdl.my.descr.fctriPad 1#1:.clusterid.fctr3 
##                                               -8.707494e-02 
##                 prdl.my.descr.fctriPad 2#1:.clusterid.fctr3 
##                                                4.528562e-02 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3 
##                                                4.783447e-01 
##                prdl.my.descr.fctriPadAir#1:.clusterid.fctr3 
##                                               -7.100320e-01 
##            prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3 
##                                               -2.575884e+00 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr3 
##                                                2.454934e+00 
##                prdl.my.descr.fctrUnknown#1:.clusterid.fctr4 
##                                                6.315811e+00 
##                 prdl.my.descr.fctriPad 1#1:.clusterid.fctr4 
##                                               -3.422748e-01 
##                 prdl.my.descr.fctriPad 2#1:.clusterid.fctr4 
##                                                1.544226e+00 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4 
##                                               -5.080517e+00 
##                prdl.my.descr.fctriPadAir#1:.clusterid.fctr4 
##                                               -2.005145e+00 
##            prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4 
##                                               -3.958597e+00 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr4 
##                                                8.987295e+00 
##                 prdl.my.descr.fctriPad 1#1:.clusterid.fctr5 
##                                                4.350755e+00 
##                 prdl.my.descr.fctriPad 2#1:.clusterid.fctr5 
##                                                2.245772e+00 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5 
##                                                2.446895e+00 
##                prdl.my.descr.fctriPadAir#1:.clusterid.fctr5 
##                                               -3.715727e+00 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr5 
##                                                2.909860e+00 
##                 prdl.my.descr.fctriPad 2#1:.clusterid.fctr6 
##                                               -1.253413e+00 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6 
##                                                5.786273e-01 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr6 
##                                                1.885894e+00 
##                prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7 
##                                               -4.377331e+00 
##               prdl.my.descr.fctriPadmini#1:.clusterid.fctr7 
##                                                8.571200e-01 
## character(0)
## character(0)
## [1] "    calling mypredict_mdl for fit:"

##    threshold     f.score
## 1        0.0 0.631355932
## 2        0.1 0.638115632
## 3        0.2 0.667664671
## 4        0.3 0.737279335
## 5        0.4 0.794780546
## 6        0.5 0.804848485
## 7        0.6 0.806451613
## 8        0.7 0.791180285
## 9        0.8 0.466216216
## 10       0.9 0.004464286
## 11       1.0 0.000000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.glmnet.N
## 1         N                                       488
## 2         Y                                       122
##   sold.fctr.predict.All.Interact.X.glmnet.Y
## 1                                        34
## 2                                       325
##          Prediction
## Reference   N   Y
##         N 488  34
##         Y 122 325
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.390093e-01   6.714312e-01   8.143296e-01   8.616078e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   3.427839e-87   3.270658e-12 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold    f.score
## 1        0.0 0.63392172
## 2        0.1 0.63783784
## 3        0.2 0.66345382
## 4        0.3 0.73029967
## 5        0.4 0.78526048
## 6        0.5 0.79636835
## 7        0.6 0.80585106
## 8        0.7 0.80497925
## 9        0.8 0.48638838
## 10       0.9 0.01438849
## 11       1.0 0.00000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.Interact.X.glmnet.N
## 1         N                                       441
## 2         Y                                       110
##   sold.fctr.predict.All.Interact.X.glmnet.Y
## 1                                        36
## 2                                       303
##          Prediction
## Reference   N   Y
##         N 441  36
##         Y 110 303
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.359551e-01   6.661923e-01   8.099608e-01   8.597019e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   9.313992e-80   1.526663e-09 
##                model_id model_method
## 1 All.Interact.X.glmnet       glmnet
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               9                     14.868                 1.855
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.8822461                    0.6       0.8064516        0.8307534
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8143296             0.8616078     0.6555776   0.8742088
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.6       0.8058511        0.8359551
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8099608             0.8597019     0.6661923
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.02792105      0.05688346
##                  label step_major step_minor     bgn     end elapsed
## 9  fit.models_1_glmnet          9          0 167.610 186.666  19.057
## 10  fit.models_1_rpart         10          0 186.667      NA      NA
## [1] "fitting model: All.Interact.X.no.rnorm.rpart"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00671 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: cp

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 969 
## 
##            CP nsplit rel error
## 1 0.527964206      0 1.0000000
## 2 0.134228188      1 0.4720358
## 3 0.006711409      2 0.3378076
## 
## Variable importance
##                                    startprice.diff:biddable 
##                                                          36 
##                                                    biddable 
##                                                          32 
##                                             startprice.diff 
##                                                          17 
##                                                    idseq.my 
##                                                           5 
##                        prdl.my.descr.fctriPad 3+#0:idseq.my 
##                                                           2 
##                                 prdl.my.descr.fctriPad 3+#0 
##                                                           2 
##     prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio 
##                                                           2 
##                    prdl.my.descr.fctriPadmini 2+#0:idseq.my 
##                                                           1 
##                        prdl.my.descr.fctriPad 3+#1:idseq.my 
##                                                           1 
## prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio 
##                                                           1 
##                    prdl.my.descr.fctriPadmini 2+#1:idseq.my 
##                                                           1 
## 
## Node number 1: 969 observations,    complexity param=0.5279642
##   predicted class=N  expected loss=0.4613003  P(node) =1
##     class counts:   522   447
##    probabilities: 0.539 0.461 
##   left son=2 (539 obs) right son=3 (430 obs)
##   Primary splits:
##       biddable                 < 0.5         to the left,  improve=151.58290, (0 missing)
##       startprice.diff:biddable < -0.01904429 to the right, improve=101.31790, (0 missing)
##       startprice.diff          < 62.89456    to the right, improve= 82.96307, (0 missing)
##       idseq.my                 < 876.5       to the right, improve= 37.84375, (0 missing)
##       condition.fctrNew        < 0.5         to the right, improve= 16.22579, (0 missing)
##   Surrogate splits:
##       startprice.diff:biddable                                < 0.01885349  to the left,  agree=0.822, adj=0.600, (0 split)
##       idseq.my                                                < 798         to the right, agree=0.628, adj=0.163, (0 split)
##       prdl.my.descr.fctriPad 3+#0:idseq.my                    < 75          to the left,  agree=0.589, adj=0.074, (0 split)
##       prdl.my.descr.fctriPad 3+#0                             < 0.5         to the left,  agree=0.586, adj=0.067, (0 split)
##       prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio < 0.5         to the left,  agree=0.586, adj=0.067, (0 split)
## 
## Node number 2: 539 observations
##   predicted class=N  expected loss=0.2115028  P(node) =0.5562436
##     class counts:   425   114
##    probabilities: 0.788 0.212 
## 
## Node number 3: 430 observations,    complexity param=0.1342282
##   predicted class=Y  expected loss=0.2255814  P(node) =0.4437564
##     class counts:    97   333
##    probabilities: 0.226 0.774 
##   left son=6 (80 obs) right son=7 (350 obs)
##   Primary splits:
##       startprice.diff                       < 63.51092    to the right, improve=82.902920, (0 missing)
##       startprice.diff:biddable              < 63.51092    to the right, improve=82.902920, (0 missing)
##       idseq.my                              < 893.5       to the right, improve=15.999440, (0 missing)
##       prdl.my.descr.fctriPad 3+#1:idseq.my  < 898.5       to the right, improve= 3.809785, (0 missing)
##       prdl.my.descr.fctriPadmini#0:idseq.my < 878.5       to the right, improve= 3.595642, (0 missing)
##   Surrogate splits:
##       startprice.diff:biddable                                    < 63.51092    to the right, agree=1.000, adj=1.000, (0 split)
##       prdl.my.descr.fctriPadmini 2+#0:idseq.my                    < 1420        to the right, agree=0.826, adj=0.063, (0 split)
##       prdl.my.descr.fctriPad 3+#1:idseq.my                        < 898.5       to the right, agree=0.821, adj=0.038, (0 split)
##       prdl.my.descr.fctriPadmini 2+#1:idseq.my                    < 776         to the right, agree=0.821, adj=0.038, (0 split)
##       prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio < 0.9688673   to the right, agree=0.821, adj=0.038, (0 split)
## 
## Node number 6: 80 observations
##   predicted class=N  expected loss=0.125  P(node) =0.08255934
##     class counts:    70    10
##    probabilities: 0.875 0.125 
## 
## Node number 7: 350 observations
##   predicted class=Y  expected loss=0.07714286  P(node) =0.3611971
##     class counts:    27   323
##    probabilities: 0.077 0.923 
## 
## n= 969 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 969 447 N (0.53869969 0.46130031)  
##   2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
##   3) biddable>=0.5 430  97 Y (0.22558140 0.77441860)  
##     6) startprice.diff>=63.51092 80  10 N (0.87500000 0.12500000) *
##     7) startprice.diff< 63.51092 350  27 Y (0.07714286 0.92285714) *
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.6313559
## 3        0.2 0.6541916
## 4        0.3 0.8105395
## 5        0.4 0.8105395
## 6        0.5 0.8105395
## 7        0.6 0.8105395
## 8        0.7 0.8105395
## 9        0.8 0.8105395
## 10       0.9 0.8105395
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rpart.N
## 1         N                                               495
## 2         Y                                               124
##   sold.fctr.predict.All.Interact.X.no.rnorm.rpart.Y
## 1                                                27
## 2                                               323
##          Prediction
## Reference   N   Y
##         N 495  27
##         Y 124 323
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.441692e-01   6.814949e-01   8.197763e-01   8.664485e-01   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##   1.762753e-90   5.612287e-15 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.6339217
## 3        0.2 0.6633907
## 4        0.3 0.8102981
## 5        0.4 0.8102981
## 6        0.5 0.8102981
## 7        0.6 0.8102981
## 8        0.7 0.8102981
## 9        0.8 0.8102981
## 10       0.9 0.8102981
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rpart.N
## 1         N                                               451
## 2         Y                                               114
##   sold.fctr.predict.All.Interact.X.no.rnorm.rpart.Y
## 1                                                26
## 2                                               299
##          Prediction
## Reference   N   Y
##         N 451  26
##         Y 114 299
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.426966e-01   6.791719e-01   8.170871e-01   8.660125e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.090657e-83   1.940362e-13 
##                        model_id model_method
## 1 All.Interact.X.no.rnorm.rpart        rpart
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                      2.442                 0.166
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.8434283                    0.9       0.8105395        0.8328173
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.8197763             0.8664485     0.6623529   0.8469855
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.9       0.8102981        0.8426966
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8170871             0.8660125     0.6791719
##   max.AccuracySD.fit max.KappaSD.fit
## 1        0.008191181      0.01513425
##                 label step_major step_minor     bgn     end elapsed
## 10 fit.models_1_rpart         10          0 186.667 192.962   6.295
## 11    fit.models_1_rf         11          0 192.963      NA      NA
## [1] "fitting model: All.Interact.X.no.rnorm.rf"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 257 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: mtry

##                 Length Class      Mode     
## call               4   -none-     call     
## type               1   -none-     character
## predicted        969   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           1938   matrix     numeric  
## oob.times        969   -none-     numeric  
## classes            2   -none-     character
## importance       257   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y                969   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           257   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.8555024
## 3        0.2 0.9440338
## 4        0.3 0.9759825
## 5        0.4 0.9988827
## 6        0.5 1.0000000
## 7        0.6 1.0000000
## 8        0.7 0.9700461
## 9        0.8 0.8949320
## 10       0.9 0.8111702
## 11       1.0 0.4315789

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.N
## 1         N                                            522
## 2         Y                                             NA
##   sold.fctr.predict.All.Interact.X.no.rnorm.rf.Y
## 1                                             NA
## 2                                            447
##          Prediction
## Reference   N   Y
##         N 522   0
##         Y   0 447
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   1.000000e+00   1.000000e+00   9.962003e-01   1.000000e+00   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##  4.731267e-261            NaN 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.7623574
## 3        0.2 0.7877551
## 4        0.3 0.8017525
## 5        0.4 0.8110599
## 6        0.5 0.8206980
## 7        0.6 0.8280255
## 8        0.7 0.7956403
## 9        0.8 0.7768362
## 10       0.9 0.7170953
## 11       1.0 0.3373494

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.N
## 1         N                                            430
## 2         Y                                             88
##   sold.fctr.predict.All.Interact.X.no.rnorm.rf.Y
## 1                                             47
## 2                                            325
##          Prediction
## Reference   N   Y
##         N 430  47
##         Y  88 325
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.483146e-01   6.930078e-01   8.230374e-01   8.712595e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   4.580090e-87   5.760403e-04 
##                     model_id model_method
## 1 All.Interact.X.no.rnorm.rf           rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                     40.125                 17.55
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.6               1        0.8369453
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.9962003                     1     0.6693933   0.9142644
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.6       0.8280255        0.8483146
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1             0.8230374             0.8712595     0.6930078
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01866165      0.03765593
# User specified
#   Ensure at least 2 vars in each regression; else varImp crashes
# sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df; sav_featsimp_df <- glb_featsimp_df
# glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df; glm_featsimp_df <- sav_featsimp_df

    # easier to exclude features
# require(gdata) # needed for trim
# model_id <- "";
# indep_vars_vctr <- head(subset(glb_models_df, grepl("All\\.X\\.", model_id), select=feats)
#                         , 1)[, "feats"]
# indep_vars_vctr <- trim(unlist(strsplit(indep_vars_vctr, "[,]")))
# indep_vars_vctr <- setdiff(indep_vars_vctr, ".rnorm")

    # easier to include features
#stop(here"); sav_models_df <- glb_models_df; glb_models_df <- sav_models_df
# !_sp
# model_id <- "csm"; indep_vars_vctr <- c(NULL
#     ,"prdline.my.fctr", "prdline.my.fctr:.clusterid.fctr"
#     ,"prdline.my.fctr*biddable"
#     #,"prdline.my.fctr*startprice.log"
#     #,"prdline.my.fctr*startprice.diff"    
#     #,"prdline.my.fctr*idseq.my"   
#     ,"prdline.my.fctr*condition.fctr"
#     ,"prdline.my.fctr*D.terms.n.post.stop"
#     #,"prdline.my.fctr*D.terms.n.post.stem"
#     ,"prdline.my.fctr*cellular.fctr"    
# #    ,"<feat1>:<feat2>"
#                                            )
# for (method in glb_models_method_vctr) {
#     ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
#                                 indep_vars_vctr=indep_vars_vctr,
#                                 model_type=glb_model_type,
#                                 rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
#                                 fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
#                     n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
#     csm_mdl_id <- paste0(model_id, ".", method)
#     csm_featsimp_df <- myget_feats_importance(glb_models_lst[[paste0(model_id, ".",
#                                                                      method)]]);               print(head(csm_featsimp_df))
# }
###

# Ntv.1.lm <- lm(reformulate(indep_vars_vctr, glb_rsp_var), glb_trnobs_df); print(summary(Ntv.1.lm))

#csm_featsimp_df[grepl("H.npnct19.log", row.names(csm_featsimp_df)), , FALSE]
#print(sprintf("%s OOB confusion matrix & accuracy: ", csm_mdl_id)); print(t(confusionMatrix(csm_OOBobs_df[, paste0(glb_rsp_var_out, csm_mdl_id)], csm_OOBobs_df[, glb_rsp_var])$table))

#glb_models_df[, "max.Accuracy.OOB", FALSE]
#varImp(glb_models_lst[["Low.cor.X.glm"]])
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.2.glm"]])$importance)
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.3.glm"]])$importance)
#glb_feats_df[grepl("npnct28", glb_feats_df$id), ]

    # User specified bivariate models
#     indep_vars_vctr_lst <- list()
#     for (feat in setdiff(names(glb_fitobs_df), 
#                          union(glb_rsp_var, glb_exclude_vars_as_features)))
#         indep_vars_vctr_lst[["feat"]] <- feat

    # User specified combinatorial models
#     indep_vars_vctr_lst <- list()
#     combn_mtrx <- combn(c("<feat1_name>", "<feat2_name>", "<featn_name>"), 
#                           <num_feats_to_choose>)
#     for (combn_ix in 1:ncol(combn_mtrx))
#         #print(combn_mtrx[, combn_ix])
#         indep_vars_vctr_lst[[combn_ix]] <- combn_mtrx[, combn_ix]
    
    # template for myfit_mdl
    #   rf is hard-coded in caret to recognize only Accuracy / Kappa evaluation metrics
    #       only for OOB in trainControl ?
    
#     ret_lst <- myfit_mdl_fn(model_id=paste0(model_id_pfx, ""), model_method=method,
#                             indep_vars_vctr=indep_vars_vctr,
#                             rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
#                             fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
#                             n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df,
#                             model_loss_mtrx=glb_model_metric_terms,
#                             model_summaryFunction=glb_model_metric_smmry,
#                             model_metric=glb_model_metric,
#                             model_metric_maximize=glb_model_metric_maximize)

# Simplify a model
# fit_df <- glb_fitobs_df; glb_mdl <- step(<complex>_mdl)

# Non-caret models
#     rpart_area_mdl <- rpart(reformulate("Area", response=glb_rsp_var), 
#                                data=glb_fitobs_df, #method="class", 
#                                control=rpart.control(cp=0.12),
#                            parms=list(loss=glb_model_metric_terms))
#     print("rpart_sel_wlm_mdl"); prp(rpart_sel_wlm_mdl)
# 

print(glb_models_df)
##                                                    model_id
## MFO.myMFO_classfr                         MFO.myMFO_classfr
## Random.myrandom_classfr             Random.myrandom_classfr
## Max.cor.Y.cv.0.rpart                   Max.cor.Y.cv.0.rpart
## Max.cor.Y.cv.0.cp.0.rpart         Max.cor.Y.cv.0.cp.0.rpart
## Max.cor.Y.rpart                             Max.cor.Y.rpart
## Max.cor.Y.glm                                 Max.cor.Y.glm
## Interact.High.cor.Y.glm             Interact.High.cor.Y.glm
## Low.cor.X.glm                                 Low.cor.X.glm
## All.X.glm                                         All.X.glm
## All.X.bayesglm                               All.X.bayesglm
## All.X.glmnet                                   All.X.glmnet
## All.X.no.rnorm.rpart                   All.X.no.rnorm.rpart
## All.X.no.rnorm.rf                         All.X.no.rnorm.rf
## All.Interact.X.glm                       All.Interact.X.glm
## All.Interact.X.bayesglm             All.Interact.X.bayesglm
## All.Interact.X.glmnet                 All.Interact.X.glmnet
## All.Interact.X.no.rnorm.rpart All.Interact.X.no.rnorm.rpart
## All.Interact.X.no.rnorm.rf       All.Interact.X.no.rnorm.rf
##                                   model_method
## MFO.myMFO_classfr                myMFO_classfr
## Random.myrandom_classfr       myrandom_classfr
## Max.cor.Y.cv.0.rpart                     rpart
## Max.cor.Y.cv.0.cp.0.rpart                rpart
## Max.cor.Y.rpart                          rpart
## Max.cor.Y.glm                              glm
## Interact.High.cor.Y.glm                    glm
## Low.cor.X.glm                              glm
## All.X.glm                                  glm
## All.X.bayesglm                        bayesglm
## All.X.glmnet                            glmnet
## All.X.no.rnorm.rpart                     rpart
## All.X.no.rnorm.rf                           rf
## All.Interact.X.glm                         glm
## All.Interact.X.bayesglm               bayesglm
## All.Interact.X.glmnet                   glmnet
## All.Interact.X.no.rnorm.rpart            rpart
## All.Interact.X.no.rnorm.rf                  rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               feats
## MFO.myMFO_classfr                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            .rnorm
## Random.myrandom_classfr                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      .rnorm
## Max.cor.Y.cv.0.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      biddable, startprice.diff
## Max.cor.Y.cv.0.cp.0.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 biddable, startprice.diff
## Max.cor.Y.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           biddable, startprice.diff
## Max.cor.Y.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             biddable, startprice.diff
## Interact.High.cor.Y.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
## Low.cor.X.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                   biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glm                                                                                                                                                        biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.bayesglm                                                                                                                                                   biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glmnet                                                                                                                                                     biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rpart                                                                                                                                                     biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rf                                                                                                                                                        biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glm            D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.bayesglm       D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glmnet         D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rpart         D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rf            D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##                               max.nTuningRuns min.elapsedtime.everything
## MFO.myMFO_classfr                           0                      0.367
## Random.myrandom_classfr                     0                      0.257
## Max.cor.Y.cv.0.rpart                        0                      0.598
## Max.cor.Y.cv.0.cp.0.rpart                   0                      0.471
## Max.cor.Y.rpart                             3                      0.959
## Max.cor.Y.glm                               1                      0.955
## Interact.High.cor.Y.glm                     1                      0.991
## Low.cor.X.glm                               1                      1.741
## All.X.glm                                   1                      2.172
## All.X.bayesglm                              1                      3.299
## All.X.glmnet                                9                      6.992
## All.X.no.rnorm.rpart                        3                      1.834
## All.X.no.rnorm.rf                           3                     19.691
## All.Interact.X.glm                          1                      5.369
## All.Interact.X.bayesglm                     1                      6.633
## All.Interact.X.glmnet                       9                     14.868
## All.Interact.X.no.rnorm.rpart               3                      2.442
## All.Interact.X.no.rnorm.rf                  3                     40.125
##                               min.elapsedtime.final max.auc.fit
## MFO.myMFO_classfr                             0.003   0.5000000
## Random.myrandom_classfr                       0.002   0.4960722
## Max.cor.Y.cv.0.rpart                          0.012   0.5000000
## Max.cor.Y.cv.0.cp.0.rpart                     0.008   0.9238966
## Max.cor.Y.rpart                               0.012   0.8434283
## Max.cor.Y.glm                                 0.013   0.8591461
## Interact.High.cor.Y.glm                       0.015   0.8617390
## Low.cor.X.glm                                 0.390   0.9028388
## All.X.glm                                     0.506   0.9069917
## All.X.bayesglm                                0.772   0.9042146
## All.X.glmnet                                  1.523   0.8677904
## All.X.no.rnorm.rpart                          0.096   0.8434283
## All.X.no.rnorm.rf                             7.227   1.0000000
## All.Interact.X.glm                            2.009   0.7954370
## All.Interact.X.bayesglm                       2.293   0.9286388
## All.Interact.X.glmnet                         1.855   0.8822461
## All.Interact.X.no.rnorm.rpart                 0.166   0.8434283
## All.Interact.X.no.rnorm.rf                   17.550   1.0000000
##                               opt.prob.threshold.fit max.f.score.fit
## MFO.myMFO_classfr                                0.5       0.0000000
## Random.myrandom_classfr                          0.4       0.6313559
## Max.cor.Y.cv.0.rpart                             0.5       0.0000000
## Max.cor.Y.cv.0.cp.0.rpart                        0.4       0.8603352
## Max.cor.Y.rpart                                  0.9       0.8105395
## Max.cor.Y.glm                                    0.7       0.8059333
## Interact.High.cor.Y.glm                          0.6       0.7908102
## Low.cor.X.glm                                    0.5       0.8101852
## All.X.glm                                        0.5       0.8258362
## All.X.bayesglm                                   0.5       0.8175520
## All.X.glmnet                                     0.6       0.8029021
## All.X.no.rnorm.rpart                             0.9       0.8105395
## All.X.no.rnorm.rf                                0.6       1.0000000
## All.Interact.X.glm                               0.9       0.7926829
## All.Interact.X.bayesglm                          0.4       0.8505747
## All.Interact.X.glmnet                            0.6       0.8064516
## All.Interact.X.no.rnorm.rpart                    0.9       0.8105395
## All.Interact.X.no.rnorm.rf                       0.6       1.0000000
##                               max.Accuracy.fit max.AccuracyLower.fit
## MFO.myMFO_classfr                    0.5386997             0.5067192
## Random.myrandom_classfr              0.4613003             0.4295557
## Max.cor.Y.cv.0.rpart                 0.5386997             0.5067192
## Max.cor.Y.cv.0.cp.0.rpart            0.8710010             0.8482486
## Max.cor.Y.rpart                      0.8276574             0.8197763
## Max.cor.Y.glm                        0.7987616             0.8132413
## Interact.High.cor.Y.glm              0.7997936             0.7958716
## Low.cor.X.glm                        0.7688338             0.8056321
## All.X.glm                            0.7647059             0.8197763
## All.X.bayesglm                       0.7770898             0.8121533
## All.X.glmnet                         0.8008256             0.8067182
## All.X.no.rnorm.rpart                 0.8338493             0.8197763
## All.X.no.rnorm.rf                    0.8482972             0.9962003
## All.Interact.X.glm                   0.7471620             0.7624276
## All.Interact.X.bayesglm              0.7925697             0.8427522
## All.Interact.X.glmnet                0.8307534             0.8143296
## All.Interact.X.no.rnorm.rpart        0.8328173             0.8197763
## All.Interact.X.no.rnorm.rf           0.8369453             0.9962003
##                               max.AccuracyUpper.fit max.Kappa.fit
## MFO.myMFO_classfr                         0.5704443     0.0000000
## Random.myrandom_classfr                   0.4932808     0.0000000
## Max.cor.Y.cv.0.rpart                      0.5704443     0.0000000
## Max.cor.Y.cv.0.cp.0.rpart                 0.8914697     0.7404889
## Max.cor.Y.rpart                           0.8664485     0.6497643
## Max.cor.Y.glm                             0.8606386     0.5929577
## Interact.High.cor.Y.glm                   0.8450880     0.5943920
## Low.cor.X.glm                             0.8538452     0.5344407
## All.X.glm                                 0.8664485     0.5250728
## All.X.bayesglm                            0.8596691     0.5506703
## All.X.glmnet                              0.8548166     0.5969700
## All.X.no.rnorm.rpart                      0.8664485     0.6645079
## All.X.no.rnorm.rf                         1.0000000     0.6925622
## All.Interact.X.glm                        0.8147476     0.4884840
## All.Interact.X.bayesglm                   0.8866791     0.5800970
## All.Interact.X.glmnet                     0.8616078     0.6555776
## All.Interact.X.no.rnorm.rpart             0.8664485     0.6623529
## All.Interact.X.no.rnorm.rf                1.0000000     0.6693933
##                               max.auc.OOB opt.prob.threshold.OOB
## MFO.myMFO_classfr               0.5000000                    0.5
## Random.myrandom_classfr         0.5185354                    0.4
## Max.cor.Y.cv.0.rpart            0.5000000                    0.5
## Max.cor.Y.cv.0.cp.0.rpart       0.8997924                    0.3
## Max.cor.Y.rpart                 0.8469855                    0.9
## Max.cor.Y.glm                   0.8659702                    0.7
## Interact.High.cor.Y.glm         0.8576352                    0.6
## Low.cor.X.glm                   0.8382546                    0.5
## All.X.glm                       0.8308232                    0.5
## All.X.bayesglm                  0.8427064                    0.5
## All.X.glmnet                    0.8560007                    0.7
## All.X.no.rnorm.rpart            0.8469855                    0.9
## All.X.no.rnorm.rf               0.9180131                    0.5
## All.Interact.X.glm              0.6856640                    0.9
## All.Interact.X.bayesglm         0.8660362                    0.5
## All.Interact.X.glmnet           0.8742088                    0.6
## All.Interact.X.no.rnorm.rpart   0.8469855                    0.9
## All.Interact.X.no.rnorm.rf      0.9142644                    0.6
##                               max.f.score.OOB max.Accuracy.OOB
## MFO.myMFO_classfr                   0.0000000        0.5359551
## Random.myrandom_classfr             0.6339217        0.4640449
## Max.cor.Y.cv.0.rpart                0.0000000        0.5359551
## Max.cor.Y.cv.0.cp.0.rpart           0.8130841        0.8202247
## Max.cor.Y.rpart                     0.8102981        0.8426966
## Max.cor.Y.glm                       0.8047809        0.8348315
## Interact.High.cor.Y.glm             0.7865459        0.8146067
## Low.cor.X.glm                       0.7600487        0.7786517
## All.X.glm                           0.7545788        0.7741573
## All.X.bayesglm                      0.7641278        0.7842697
## All.X.glmnet                        0.7956104        0.8325843
## All.X.no.rnorm.rpart                0.8102981        0.8426966
## All.X.no.rnorm.rf                   0.8294479        0.8438202
## All.Interact.X.glm                  0.6898803        0.6797753
## All.Interact.X.bayesglm             0.7949367        0.8179775
## All.Interact.X.glmnet               0.8058511        0.8359551
## All.Interact.X.no.rnorm.rpart       0.8102981        0.8426966
## All.Interact.X.no.rnorm.rf          0.8280255        0.8483146
##                               max.AccuracyLower.OOB max.AccuracyUpper.OOB
## MFO.myMFO_classfr                         0.5025561             0.5691153
## Random.myrandom_classfr                   0.4308847             0.4974439
## Max.cor.Y.cv.0.rpart                      0.5025561             0.5691153
## Max.cor.Y.cv.0.cp.0.rpart                 0.7933882             0.8449213
## Max.cor.Y.rpart                           0.8170871             0.8660125
## Max.cor.Y.glm                             0.8087745             0.8586487
## Interact.High.cor.Y.glm                   0.7874870             0.8396247
## Low.cor.X.glm                             0.7499158             0.8055293
## All.X.glm                                 0.7452413             0.8012453
## All.X.bayesglm                            0.7557654             0.8108777
## All.X.glmnet                              0.8064031             0.8565410
## All.X.no.rnorm.rpart                      0.8170871             0.8660125
## All.X.no.rnorm.rf                         0.8182763             0.8670627
## All.Interact.X.glm                        0.6480036             0.7103515
## All.Interact.X.bayesglm                   0.7910266             0.8428037
## All.Interact.X.glmnet                     0.8099608             0.8597019
## All.Interact.X.no.rnorm.rpart             0.8170871             0.8660125
## All.Interact.X.no.rnorm.rf                0.8230374             0.8712595
##                               max.Kappa.OOB max.AccuracySD.fit
## MFO.myMFO_classfr                 0.0000000                 NA
## Random.myrandom_classfr           0.0000000                 NA
## Max.cor.Y.cv.0.rpart              0.0000000                 NA
## Max.cor.Y.cv.0.cp.0.rpart         0.6403332                 NA
## Max.cor.Y.rpart                   0.6791719        0.010872708
## Max.cor.Y.glm                     0.6639612        0.038792458
## Interact.High.cor.Y.glm           0.6240496        0.037194678
## Low.cor.X.glm                     0.5546405        0.019662083
## All.X.glm                         0.5454499        0.029856504
## All.X.bayesglm                    0.5654496        0.034475321
## All.X.glmnet                      0.6580401        0.036936079
## All.X.no.rnorm.rpart              0.6791719        0.008937311
## All.X.no.rnorm.rf                 0.6854548        0.008191181
## All.Interact.X.glm                0.3658021        0.013960526
## All.Interact.X.bayesglm           0.6319103        0.035163519
## All.Interact.X.glmnet             0.6661923        0.027921051
## All.Interact.X.no.rnorm.rpart     0.6791719        0.008191181
## All.Interact.X.no.rnorm.rf        0.6930078        0.018661653
##                               max.KappaSD.fit min.aic.fit
## MFO.myMFO_classfr                          NA          NA
## Random.myrandom_classfr                    NA          NA
## Max.cor.Y.cv.0.rpart                       NA          NA
## Max.cor.Y.cv.0.cp.0.rpart                  NA          NA
## Max.cor.Y.rpart                    0.02515063          NA
## Max.cor.Y.glm                      0.07822035    883.4623
## Interact.High.cor.Y.glm            0.07559072    887.8417
## Low.cor.X.glm                      0.03929384    914.1270
## All.X.glm                          0.05995988    931.5575
## All.X.bayesglm                     0.06919184   1056.6761
## All.X.glmnet                       0.07483640          NA
## All.X.no.rnorm.rpart               0.01629107          NA
## All.X.no.rnorm.rf                  0.01601525          NA
## All.Interact.X.glm                 0.02568968  14993.8106
## All.Interact.X.bayesglm            0.07034406   1164.3831
## All.Interact.X.glmnet              0.05688346          NA
## All.Interact.X.no.rnorm.rpart      0.01513425          NA
## All.Interact.X.no.rnorm.rf         0.03765593          NA
rm(ret_lst)
fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df, "fit.models_1_end", 
                                     major.inc=TRUE)
##               label step_major step_minor     bgn     end elapsed
## 11  fit.models_1_rf         11          0 192.963 236.646  43.683
## 12 fit.models_1_end         12          0 236.647      NA      NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
##         label step_major step_minor     bgn     end elapsed
## 11 fit.models          7          1  92.533 236.654 144.122
## 12 fit.models          7          2 236.655      NA      NA
if (!is.null(glb_model_metric_smmry)) {
    stats_df <- glb_models_df[, "model_id", FALSE]

    stats_mdl_df <- data.frame()
    for (model_id in stats_df$model_id) {
        stats_mdl_df <- rbind(stats_mdl_df, 
            mypredict_mdl(glb_models_lst[[model_id]], glb_fitobs_df, glb_rsp_var, 
                          glb_rsp_var_out, model_id, "fit",
                                glb_model_metric_smmry, glb_model_metric, 
                                glb_model_metric_maximize, ret_type="stats"))
    }
    stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
    
    stats_mdl_df <- data.frame()
    for (model_id in stats_df$model_id) {
        stats_mdl_df <- rbind(stats_mdl_df, 
            mypredict_mdl(glb_models_lst[[model_id]], glb_OOBobs_df, glb_rsp_var, 
                          glb_rsp_var_out, model_id, "OOB",
                                glb_model_metric_smmry, glb_model_metric, 
                                glb_model_metric_maximize, ret_type="stats"))
    }
    stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
    
    print("Merging following data into glb_models_df:")
    print(stats_mrg_df <- stats_df[, c(1, grep(glb_model_metric, names(stats_df)))])
    print(tmp_models_df <- orderBy(~model_id, glb_models_df[, c("model_id",
                                    grep(glb_model_metric, names(stats_df), value=TRUE))]))

    tmp2_models_df <- glb_models_df[, c("model_id", setdiff(names(glb_models_df),
                                    grep(glb_model_metric, names(stats_df), value=TRUE)))]
    tmp3_models_df <- merge(tmp2_models_df, stats_mrg_df, all.x=TRUE, sort=FALSE)
    print(tmp3_models_df)
    print(names(tmp3_models_df))
    print(glb_models_df <- subset(tmp3_models_df, select=-model_id.1))
}

plt_models_df <- glb_models_df[, -grep("SD|Upper|Lower", names(glb_models_df))]
for (var in grep("^min.", names(plt_models_df), value=TRUE)) {
    plt_models_df[, sub("min.", "inv.", var)] <- 
        #ifelse(all(is.na(tmp <- plt_models_df[, var])), NA, 1.0 / tmp)
        1.0 / plt_models_df[, var]
    plt_models_df <- plt_models_df[ , -grep(var, names(plt_models_df))]
}
print(plt_models_df)
##                                                    model_id
## MFO.myMFO_classfr                         MFO.myMFO_classfr
## Random.myrandom_classfr             Random.myrandom_classfr
## Max.cor.Y.cv.0.rpart                   Max.cor.Y.cv.0.rpart
## Max.cor.Y.cv.0.cp.0.rpart         Max.cor.Y.cv.0.cp.0.rpart
## Max.cor.Y.rpart                             Max.cor.Y.rpart
## Max.cor.Y.glm                                 Max.cor.Y.glm
## Interact.High.cor.Y.glm             Interact.High.cor.Y.glm
## Low.cor.X.glm                                 Low.cor.X.glm
## All.X.glm                                         All.X.glm
## All.X.bayesglm                               All.X.bayesglm
## All.X.glmnet                                   All.X.glmnet
## All.X.no.rnorm.rpart                   All.X.no.rnorm.rpart
## All.X.no.rnorm.rf                         All.X.no.rnorm.rf
## All.Interact.X.glm                       All.Interact.X.glm
## All.Interact.X.bayesglm             All.Interact.X.bayesglm
## All.Interact.X.glmnet                 All.Interact.X.glmnet
## All.Interact.X.no.rnorm.rpart All.Interact.X.no.rnorm.rpart
## All.Interact.X.no.rnorm.rf       All.Interact.X.no.rnorm.rf
##                                   model_method
## MFO.myMFO_classfr                myMFO_classfr
## Random.myrandom_classfr       myrandom_classfr
## Max.cor.Y.cv.0.rpart                     rpart
## Max.cor.Y.cv.0.cp.0.rpart                rpart
## Max.cor.Y.rpart                          rpart
## Max.cor.Y.glm                              glm
## Interact.High.cor.Y.glm                    glm
## Low.cor.X.glm                              glm
## All.X.glm                                  glm
## All.X.bayesglm                        bayesglm
## All.X.glmnet                            glmnet
## All.X.no.rnorm.rpart                     rpart
## All.X.no.rnorm.rf                           rf
## All.Interact.X.glm                         glm
## All.Interact.X.bayesglm               bayesglm
## All.Interact.X.glmnet                   glmnet
## All.Interact.X.no.rnorm.rpart            rpart
## All.Interact.X.no.rnorm.rf                  rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               feats
## MFO.myMFO_classfr                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            .rnorm
## Random.myrandom_classfr                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      .rnorm
## Max.cor.Y.cv.0.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      biddable, startprice.diff
## Max.cor.Y.cv.0.cp.0.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 biddable, startprice.diff
## Max.cor.Y.rpart                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           biddable, startprice.diff
## Max.cor.Y.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             biddable, startprice.diff
## Interact.High.cor.Y.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
## Low.cor.X.glm                                                                                                                                                                                                                                                                                                                                                                                                                                                   biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glm                                                                                                                                                        biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.bayesglm                                                                                                                                                   biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glmnet                                                                                                                                                     biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rpart                                                                                                                                                     biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rf                                                                                                                                                        biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glm            D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.bayesglm       D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glmnet         D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rpart         D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rf            D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##                               max.nTuningRuns max.auc.fit
## MFO.myMFO_classfr                           0   0.5000000
## Random.myrandom_classfr                     0   0.4960722
## Max.cor.Y.cv.0.rpart                        0   0.5000000
## Max.cor.Y.cv.0.cp.0.rpart                   0   0.9238966
## Max.cor.Y.rpart                             3   0.8434283
## Max.cor.Y.glm                               1   0.8591461
## Interact.High.cor.Y.glm                     1   0.8617390
## Low.cor.X.glm                               1   0.9028388
## All.X.glm                                   1   0.9069917
## All.X.bayesglm                              1   0.9042146
## All.X.glmnet                                9   0.8677904
## All.X.no.rnorm.rpart                        3   0.8434283
## All.X.no.rnorm.rf                           3   1.0000000
## All.Interact.X.glm                          1   0.7954370
## All.Interact.X.bayesglm                     1   0.9286388
## All.Interact.X.glmnet                       9   0.8822461
## All.Interact.X.no.rnorm.rpart               3   0.8434283
## All.Interact.X.no.rnorm.rf                  3   1.0000000
##                               opt.prob.threshold.fit max.f.score.fit
## MFO.myMFO_classfr                                0.5       0.0000000
## Random.myrandom_classfr                          0.4       0.6313559
## Max.cor.Y.cv.0.rpart                             0.5       0.0000000
## Max.cor.Y.cv.0.cp.0.rpart                        0.4       0.8603352
## Max.cor.Y.rpart                                  0.9       0.8105395
## Max.cor.Y.glm                                    0.7       0.8059333
## Interact.High.cor.Y.glm                          0.6       0.7908102
## Low.cor.X.glm                                    0.5       0.8101852
## All.X.glm                                        0.5       0.8258362
## All.X.bayesglm                                   0.5       0.8175520
## All.X.glmnet                                     0.6       0.8029021
## All.X.no.rnorm.rpart                             0.9       0.8105395
## All.X.no.rnorm.rf                                0.6       1.0000000
## All.Interact.X.glm                               0.9       0.7926829
## All.Interact.X.bayesglm                          0.4       0.8505747
## All.Interact.X.glmnet                            0.6       0.8064516
## All.Interact.X.no.rnorm.rpart                    0.9       0.8105395
## All.Interact.X.no.rnorm.rf                       0.6       1.0000000
##                               max.Accuracy.fit max.Kappa.fit max.auc.OOB
## MFO.myMFO_classfr                    0.5386997     0.0000000   0.5000000
## Random.myrandom_classfr              0.4613003     0.0000000   0.5185354
## Max.cor.Y.cv.0.rpart                 0.5386997     0.0000000   0.5000000
## Max.cor.Y.cv.0.cp.0.rpart            0.8710010     0.7404889   0.8997924
## Max.cor.Y.rpart                      0.8276574     0.6497643   0.8469855
## Max.cor.Y.glm                        0.7987616     0.5929577   0.8659702
## Interact.High.cor.Y.glm              0.7997936     0.5943920   0.8576352
## Low.cor.X.glm                        0.7688338     0.5344407   0.8382546
## All.X.glm                            0.7647059     0.5250728   0.8308232
## All.X.bayesglm                       0.7770898     0.5506703   0.8427064
## All.X.glmnet                         0.8008256     0.5969700   0.8560007
## All.X.no.rnorm.rpart                 0.8338493     0.6645079   0.8469855
## All.X.no.rnorm.rf                    0.8482972     0.6925622   0.9180131
## All.Interact.X.glm                   0.7471620     0.4884840   0.6856640
## All.Interact.X.bayesglm              0.7925697     0.5800970   0.8660362
## All.Interact.X.glmnet                0.8307534     0.6555776   0.8742088
## All.Interact.X.no.rnorm.rpart        0.8328173     0.6623529   0.8469855
## All.Interact.X.no.rnorm.rf           0.8369453     0.6693933   0.9142644
##                               opt.prob.threshold.OOB max.f.score.OOB
## MFO.myMFO_classfr                                0.5       0.0000000
## Random.myrandom_classfr                          0.4       0.6339217
## Max.cor.Y.cv.0.rpart                             0.5       0.0000000
## Max.cor.Y.cv.0.cp.0.rpart                        0.3       0.8130841
## Max.cor.Y.rpart                                  0.9       0.8102981
## Max.cor.Y.glm                                    0.7       0.8047809
## Interact.High.cor.Y.glm                          0.6       0.7865459
## Low.cor.X.glm                                    0.5       0.7600487
## All.X.glm                                        0.5       0.7545788
## All.X.bayesglm                                   0.5       0.7641278
## All.X.glmnet                                     0.7       0.7956104
## All.X.no.rnorm.rpart                             0.9       0.8102981
## All.X.no.rnorm.rf                                0.5       0.8294479
## All.Interact.X.glm                               0.9       0.6898803
## All.Interact.X.bayesglm                          0.5       0.7949367
## All.Interact.X.glmnet                            0.6       0.8058511
## All.Interact.X.no.rnorm.rpart                    0.9       0.8102981
## All.Interact.X.no.rnorm.rf                       0.6       0.8280255
##                               max.Accuracy.OOB max.Kappa.OOB
## MFO.myMFO_classfr                    0.5359551     0.0000000
## Random.myrandom_classfr              0.4640449     0.0000000
## Max.cor.Y.cv.0.rpart                 0.5359551     0.0000000
## Max.cor.Y.cv.0.cp.0.rpart            0.8202247     0.6403332
## Max.cor.Y.rpart                      0.8426966     0.6791719
## Max.cor.Y.glm                        0.8348315     0.6639612
## Interact.High.cor.Y.glm              0.8146067     0.6240496
## Low.cor.X.glm                        0.7786517     0.5546405
## All.X.glm                            0.7741573     0.5454499
## All.X.bayesglm                       0.7842697     0.5654496
## All.X.glmnet                         0.8325843     0.6580401
## All.X.no.rnorm.rpart                 0.8426966     0.6791719
## All.X.no.rnorm.rf                    0.8438202     0.6854548
## All.Interact.X.glm                   0.6797753     0.3658021
## All.Interact.X.bayesglm              0.8179775     0.6319103
## All.Interact.X.glmnet                0.8359551     0.6661923
## All.Interact.X.no.rnorm.rpart        0.8426966     0.6791719
## All.Interact.X.no.rnorm.rf           0.8483146     0.6930078
##                               inv.elapsedtime.everything
## MFO.myMFO_classfr                             2.72479564
## Random.myrandom_classfr                       3.89105058
## Max.cor.Y.cv.0.rpart                          1.67224080
## Max.cor.Y.cv.0.cp.0.rpart                     2.12314225
## Max.cor.Y.rpart                               1.04275287
## Max.cor.Y.glm                                 1.04712042
## Interact.High.cor.Y.glm                       1.00908174
## Low.cor.X.glm                                 0.57438254
## All.X.glm                                     0.46040516
## All.X.bayesglm                                0.30312216
## All.X.glmnet                                  0.14302059
## All.X.no.rnorm.rpart                          0.54525627
## All.X.no.rnorm.rf                             0.05078462
## All.Interact.X.glm                            0.18625442
## All.Interact.X.bayesglm                       0.15076134
## All.Interact.X.glmnet                         0.06725854
## All.Interact.X.no.rnorm.rpart                 0.40950041
## All.Interact.X.no.rnorm.rf                    0.02492212
##                               inv.elapsedtime.final  inv.aic.fit
## MFO.myMFO_classfr                      333.33333333           NA
## Random.myrandom_classfr                500.00000000           NA
## Max.cor.Y.cv.0.rpart                    83.33333333           NA
## Max.cor.Y.cv.0.cp.0.rpart              125.00000000           NA
## Max.cor.Y.rpart                         83.33333333           NA
## Max.cor.Y.glm                           76.92307692 1.131910e-03
## Interact.High.cor.Y.glm                 66.66666667 1.126327e-03
## Low.cor.X.glm                            2.56410256 1.093940e-03
## All.X.glm                                1.97628458 1.073471e-03
## All.X.bayesglm                           1.29533679 9.463638e-04
## All.X.glmnet                             0.65659882           NA
## All.X.no.rnorm.rpart                    10.41666667           NA
## All.X.no.rnorm.rf                        0.13837000           NA
## All.Interact.X.glm                       0.49776008 6.669419e-05
## All.Interact.X.bayesglm                  0.43610990 8.588238e-04
## All.Interact.X.glmnet                    0.53908356           NA
## All.Interact.X.no.rnorm.rpart            6.02409639           NA
## All.Interact.X.no.rnorm.rf               0.05698006           NA
print(myplot_radar(radar_inp_df=plt_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 175 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

# print(myplot_radar(radar_inp_df=subset(plt_models_df, 
#         !(model_id %in% grep("random|MFO", plt_models_df$model_id, value=TRUE)))))

# Compute CI for <metric>SD
glb_models_df <- mutate(glb_models_df, 
                max.df = ifelse(max.nTuningRuns > 1, max.nTuningRuns - 1, NA),
                min.sd2ci.scaler = ifelse(is.na(max.df), NA, qt(0.975, max.df)))
for (var in grep("SD", names(glb_models_df), value=TRUE)) {
    # Does CI alredy exist ?
    var_components <- unlist(strsplit(var, "SD"))
    varActul <- paste0(var_components[1],          var_components[2])
    varUpper <- paste0(var_components[1], "Upper", var_components[2])
    varLower <- paste0(var_components[1], "Lower", var_components[2])
    if (varUpper %in% names(glb_models_df)) {
        warning(varUpper, " already exists in glb_models_df")
        # Assuming Lower also exists
        next
    }    
    print(sprintf("var:%s", var))
    # CI is dependent on sample size in t distribution; df=n-1
    glb_models_df[, varUpper] <- glb_models_df[, varActul] + 
        glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
    glb_models_df[, varLower] <- glb_models_df[, varActul] - 
        glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
}
## Warning: max.AccuracyUpper.fit already exists in glb_models_df
## [1] "var:max.KappaSD.fit"
# Plot metrics with CI
plt_models_df <- glb_models_df[, "model_id", FALSE]
pltCI_models_df <- glb_models_df[, "model_id", FALSE]
for (var in grep("Upper", names(glb_models_df), value=TRUE)) {
    var_components <- unlist(strsplit(var, "Upper"))
    col_name <- unlist(paste(var_components, collapse=""))
    plt_models_df[, col_name] <- glb_models_df[, col_name]
    for (name in paste0(var_components[1], c("Upper", "Lower"), var_components[2]))
        pltCI_models_df[, name] <- glb_models_df[, name]
}

build_statsCI_data <- function(plt_models_df) {
    mltd_models_df <- melt(plt_models_df, id.vars="model_id")
    mltd_models_df$data <- sapply(1:nrow(mltd_models_df), 
        function(row_ix) tail(unlist(strsplit(as.character(
            mltd_models_df[row_ix, "variable"]), "[.]")), 1))
    mltd_models_df$label <- sapply(1:nrow(mltd_models_df), 
        function(row_ix) head(unlist(strsplit(as.character(
            mltd_models_df[row_ix, "variable"]), 
            paste0(".", mltd_models_df[row_ix, "data"]))), 1))
    #print(mltd_models_df)
    
    return(mltd_models_df)
}
mltd_models_df <- build_statsCI_data(plt_models_df)

mltdCI_models_df <- melt(pltCI_models_df, id.vars="model_id")
for (row_ix in 1:nrow(mltdCI_models_df)) {
    for (type in c("Upper", "Lower")) {
        if (length(var_components <- unlist(strsplit(
                as.character(mltdCI_models_df[row_ix, "variable"]), type))) > 1) {
            #print(sprintf("row_ix:%d; type:%s; ", row_ix, type))
            mltdCI_models_df[row_ix, "label"] <- var_components[1]
            mltdCI_models_df[row_ix, "data"] <- 
                unlist(strsplit(var_components[2], "[.]"))[2]
            mltdCI_models_df[row_ix, "type"] <- type
            break
        }
    }    
}
wideCI_models_df <- reshape(subset(mltdCI_models_df, select=-variable), 
                            timevar="type", 
        idvar=setdiff(names(mltdCI_models_df), c("type", "value", "variable")), 
                            direction="wide")
#print(wideCI_models_df)
mrgdCI_models_df <- merge(wideCI_models_df, mltd_models_df, all.x=TRUE)
#print(mrgdCI_models_df)

# Merge stats back in if CIs don't exist
goback_vars <- c()
for (var in unique(mltd_models_df$label)) {
    for (type in unique(mltd_models_df$data)) {
        var_type <- paste0(var, ".", type)
        # if this data is already present, next
        if (var_type %in% unique(paste(mltd_models_df$label, mltd_models_df$data,
                                       sep=".")))
            next
        #print(sprintf("var_type:%s", var_type))
        goback_vars <- c(goback_vars, var_type)
    }
}

if (length(goback_vars) > 0) {
    mltd_goback_df <- build_statsCI_data(glb_models_df[, c("model_id", goback_vars)])
    mltd_models_df <- rbind(mltd_models_df, mltd_goback_df)
}

mltd_models_df <- merge(mltd_models_df, glb_models_df[, c("model_id", "model_method")], 
                        all.x=TRUE)

png(paste0(glb_out_pfx, "models_bar.png"), width=480*3, height=480*2)
print(gp <- myplot_bar(mltd_models_df, "model_id", "value", colorcol_name="model_method") + 
        geom_errorbar(data=mrgdCI_models_df, 
            mapping=aes(x=model_id, ymax=value.Upper, ymin=value.Lower), width=0.5) + 
          facet_grid(label ~ data, scales="free") + 
          theme(axis.text.x = element_text(angle = 90,vjust = 0.5)))
dev.off()
## quartz_off_screen 
##                 2
print(gp)

# used for console inspection
get_model_sel_frmla <- function() {
    model_evl_terms <- c(NULL)
    for (metric in glb_model_evl_criteria)
        model_evl_terms <- c(model_evl_terms, 
                             ifelse(length(grep("max", metric)) > 0, "-", "+"), metric)
    if (glb_is_classification && glb_is_binomial)
        model_evl_terms <- c(model_evl_terms, "-", "opt.prob.threshold.OOB")
    return(model_sel_frmla <- as.formula(paste(c("~ ", model_evl_terms), collapse=" ")))
}

dsp_models_cols <- c("model_id", glb_model_evl_criteria) 
if (glb_is_classification && glb_is_binomial) 
    dsp_models_cols <- c(dsp_models_cols, "opt.prob.threshold.OOB")
print(dsp_models_df <- orderBy(model_sel_frmla <- get_model_sel_frmla(), 
                               glb_models_df)[, dsp_models_cols])
##                         model_id max.Accuracy.OOB max.auc.OOB
## 18    All.Interact.X.no.rnorm.rf        0.8483146   0.9142644
## 13             All.X.no.rnorm.rf        0.8438202   0.9180131
## 5                Max.cor.Y.rpart        0.8426966   0.8469855
## 12          All.X.no.rnorm.rpart        0.8426966   0.8469855
## 17 All.Interact.X.no.rnorm.rpart        0.8426966   0.8469855
## 16         All.Interact.X.glmnet        0.8359551   0.8742088
## 6                  Max.cor.Y.glm        0.8348315   0.8659702
## 11                  All.X.glmnet        0.8325843   0.8560007
## 4      Max.cor.Y.cv.0.cp.0.rpart        0.8202247   0.8997924
## 15       All.Interact.X.bayesglm        0.8179775   0.8660362
## 7        Interact.High.cor.Y.glm        0.8146067   0.8576352
## 10                All.X.bayesglm        0.7842697   0.8427064
## 8                  Low.cor.X.glm        0.7786517   0.8382546
## 9                      All.X.glm        0.7741573   0.8308232
## 14            All.Interact.X.glm        0.6797753   0.6856640
## 1              MFO.myMFO_classfr        0.5359551   0.5000000
## 3           Max.cor.Y.cv.0.rpart        0.5359551   0.5000000
## 2        Random.myrandom_classfr        0.4640449   0.5185354
##    max.Kappa.OOB min.aic.fit opt.prob.threshold.OOB
## 18     0.6930078          NA                    0.6
## 13     0.6854548          NA                    0.5
## 5      0.6791719          NA                    0.9
## 12     0.6791719          NA                    0.9
## 17     0.6791719          NA                    0.9
## 16     0.6661923          NA                    0.6
## 6      0.6639612    883.4623                    0.7
## 11     0.6580401          NA                    0.7
## 4      0.6403332          NA                    0.3
## 15     0.6319103   1164.3831                    0.5
## 7      0.6240496    887.8417                    0.6
## 10     0.5654496   1056.6761                    0.5
## 8      0.5546405    914.1270                    0.5
## 9      0.5454499    931.5575                    0.5
## 14     0.3658021  14993.8106                    0.9
## 1      0.0000000          NA                    0.5
## 3      0.0000000          NA                    0.5
## 2      0.0000000          NA                    0.4
print(myplot_radar(radar_inp_df=dsp_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 75 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

print("Metrics used for model selection:"); print(model_sel_frmla)
## [1] "Metrics used for model selection:"
## ~-max.Accuracy.OOB - max.auc.OOB - max.Kappa.OOB + min.aic.fit - 
##     opt.prob.threshold.OOB
## <environment: 0x7fa4be8b28e8>
print(sprintf("Best model id: %s", dsp_models_df[1, "model_id"]))
## [1] "Best model id: All.Interact.X.no.rnorm.rf"
glb_get_predictions <- function(df, mdl_id, rsp_var_out, prob_threshold_def=NULL) {
    mdl <- glb_models_lst[[mdl_id]]
    rsp_var_out <- paste0(rsp_var_out, mdl_id)

    if (glb_is_regression) {
        df[, rsp_var_out] <- predict(mdl, newdata=df, type="raw")
        print(myplot_scatter(df, glb_rsp_var, rsp_var_out, smooth=TRUE))
        df[, paste0(rsp_var_out, ".err")] <- 
            abs(df[, rsp_var_out] - df[, glb_rsp_var])
        print(head(orderBy(reformulate(c("-", paste0(rsp_var_out, ".err"))), 
                           df)))                             
    }

    if (glb_is_classification && glb_is_binomial) {
        prob_threshold <- glb_models_df[glb_models_df$model_id == mdl_id, 
                                        "opt.prob.threshold.OOB"]
        if (is.null(prob_threshold) || is.na(prob_threshold)) {
            warning("Using default probability threshold: ", prob_threshold_def)
            if (is.null(prob_threshold <- prob_threshold_def))
                stop("Default probability threshold is NULL")
        }
        
        df[, paste0(rsp_var_out, ".prob")] <- 
            predict(mdl, newdata=df, type="prob")[, 2]
        df[, rsp_var_out] <- 
                factor(levels(df[, glb_rsp_var])[
                    (df[, paste0(rsp_var_out, ".prob")] >=
                        prob_threshold) * 1 + 1], levels(df[, glb_rsp_var]))
    
        # prediction stats already reported by myfit_mdl ???
    }    
    
    if (glb_is_classification && !glb_is_binomial) {
        df[, rsp_var_out] <- predict(mdl, newdata=df, type="raw")
        df[, paste0(rsp_var_out, ".prob")] <- 
            predict(mdl, newdata=df, type="prob")
    }

    return(df)
}    

glb_to_sav <- function() {
    sav_allobs_df <<- glb_allobs_df 
    sav_trnobs_df <<- glb_trnobs_df
    sav_fitobs_df <<- glb_fitobs_df
    sav_OOBobs_df <<- glb_OOBobs_df
    sav_newobs_df <<- glb_newobs_df
    if (!is.null(glb_models_lst )) sav_models_lst  <<- glb_models_lst
    if (!is.null(glb_models_df  )) sav_models_df   <<- glb_models_df
    if (!is.null(glb_feats_df   )) sav_feats_df    <<- glb_feats_df
    if (any(grepl("glb_fitsimp_df", ls(), fixed=TRUE)) &&
        !is.null(glb_featsimp_df)) sav_featsimp_df <<- glb_featsimp_df    
}
#stop(here"); glb_to_sav(); glb_allobs_df <- sav_allobs_df; glb_trnobs_df <- sav_trnobs_df; glb_fitobs_df <- sav_fitobs_df; glb_OOBobs_df <- sav_OOBobs_df; sav_models_df <- glb_models_df; glb_models_df <- sav_models_df; glb_featsimp_df <- sav_featsimp_df    

if (!is.null(glb_sel_mdl_id) & (glb_sel_mdl_id == "Ensemble.glmnet")) {
    if (#(glb_is_regression) | 
        ((glb_is_classification) & (!glb_is_binomial)))
        stop("Ensemble models not implemented yet for multinomial classification")
    
    tmp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)
    row.names(tmp_models_df) <- tmp_models_df$model_id
#     mdl_threshold_pos <- min(which(tmp_models_df$model_id %in% 
#                                 c("MFO.myMFO_classfr", "Baseline.mybaseln_classfr"))) - 1
    mdl_threshold_pos <- min(which(grepl("MFO|Baseline", tmp_models_df$model_id))) - 1
    for (model_id in tmp_models_df$model_id[1:mdl_threshold_pos]) {
        glb_fitobs_df <- glb_get_predictions(df=glb_fitobs_df, model_id, glb_rsp_var_out)
        glb_OOBobs_df <- glb_get_predictions(df=glb_OOBobs_df, model_id, glb_rsp_var_out)
    }
    
    model_id <- "Ensemble"; method <- "glmnet";
    #rex_str <- paste0(gsub(".", "\\.", glb_rsp_var_out, fixed=TRUE), "(.+)(?!(prob))")    
    #tmp_names <- tail(names(glb_fitobs_df), 5)
    #match_pos <- gregexpr(rex_str, tmp_names, perl=TRUE)
    indep_vars_vctr <- grep(glb_rsp_var_out, names(glb_fitobs_df), fixed=TRUE, value=TRUE)
    if (glb_is_classification && glb_is_binomial)
        indep_vars_vctr <- grep("prob$", indep_vars_vctr, value=TRUE)
    else
        indep_vars_vctr <- indep_vars_vctr[!grepl("err$", indep_vars_vctr)]
    ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
                         indep_vars_vctr=indep_vars_vctr,
                         model_type=glb_model_type,
                         rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
                         fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
                         n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
}
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## [1] "fitting model: Ensemble.glmnet"
## [1] "    indep_vars: sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob, sold.fctr.predict.All.X.no.rnorm.rf.prob, sold.fctr.predict.Max.cor.Y.rpart.prob, sold.fctr.predict.All.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.glm.prob, sold.fctr.predict.All.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob, sold.fctr.predict.All.Interact.X.bayesglm.prob, sold.fctr.predict.Interact.High.cor.Y.glm.prob, sold.fctr.predict.All.X.bayesglm.prob, sold.fctr.predict.Low.cor.X.glm.prob, sold.fctr.predict.All.X.glm.prob, sold.fctr.predict.All.Interact.X.glm.prob"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0972 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda

##             Length Class      Mode     
## a0            97   -none-     numeric  
## beta        1455   dgCMatrix  S4       
## df            97   -none-     numeric  
## dim            2   -none-     numeric  
## lambda        97   -none-     numeric  
## dev.ratio     97   -none-     numeric  
## nulldev        1   -none-     numeric  
## npasses        1   -none-     numeric  
## jerr           1   -none-     numeric  
## offset         1   -none-     logical  
## classnames     2   -none-     character
## call           5   -none-     call     
## nobs           1   -none-     numeric  
## lambdaOpt      1   -none-     numeric  
## xNames        15   -none-     character
## problemType    1   -none-     character
## tuneValue      2   data.frame list     
## obsLevels      2   -none-     character
## [1] "min lambda > lambdaOpt:"
##                                       (Intercept) 
##                                        -2.5446998 
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 
##                                         2.3378403 
##          sold.fctr.predict.All.X.no.rnorm.rf.prob 
##                                         2.3819269 
##  sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 
##                                         0.1783523 
##    sold.fctr.predict.All.Interact.X.bayesglm.prob 
##                                         0.1199788 
## [1] "max lambda < lambdaOpt:"
##                                          (Intercept) 
##                                          -9.90797572 
##    sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 
##                                          11.78238766 
##             sold.fctr.predict.All.X.no.rnorm.rf.prob 
##                                          12.45455190 
##               sold.fctr.predict.Max.cor.Y.rpart.prob 
##                                          -0.80850918 
##          sold.fctr.predict.All.X.no.rnorm.rpart.prob 
##                                          -0.81784589 
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 
##                                          -0.80275250 
##         sold.fctr.predict.All.Interact.X.glmnet.prob 
##                                          -1.23144839 
##                  sold.fctr.predict.All.X.glmnet.prob 
##                                          -0.46608208 
##     sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 
##                                          -0.03955328 
##       sold.fctr.predict.All.Interact.X.bayesglm.prob 
##                                          -0.08909699 
##                sold.fctr.predict.All.X.bayesglm.prob 
##                                          -0.30426435 
##                 sold.fctr.predict.Low.cor.X.glm.prob 
##                                          -0.06816013 
## character(0)
## character(0)
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6313559
## 2        0.1 0.8216912
## 3        0.2 0.9664865
## 4        0.3 0.9845815
## 5        0.4 1.0000000
## 6        0.5 1.0000000
## 7        0.6 1.0000000
## 8        0.7 0.9735936
## 9        0.8 0.9044118
## 10       0.9 0.7685950
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Ensemble.glmnet.N
## 1         N                                 522
## 2         Y                                  NA
##   sold.fctr.predict.Ensemble.glmnet.Y
## 1                                  NA
## 2                                 447
##          Prediction
## Reference   N   Y
##         N 522   0
##         Y   0 447
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   1.000000e+00   1.000000e+00   9.962003e-01   1.000000e+00   5.386997e-01 
## AccuracyPValue  McnemarPValue 
##  4.731267e-261            NaN 
## [1] "    calling mypredict_mdl for OOB:"

##    threshold   f.score
## 1        0.0 0.6339217
## 2        0.1 0.7431694
## 3        0.2 0.7945205
## 4        0.3 0.8103837
## 5        0.4 0.8217237
## 6        0.5 0.8314883
## 7        0.6 0.8259212
## 8        0.7 0.8211921
## 9        0.8 0.7859155
## 10       0.9 0.6473430
## 11       1.0 0.0000000

## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
##   sold.fctr sold.fctr.predict.Ensemble.glmnet.N
## 1         N                                 415
## 2         Y                                  75
##   sold.fctr.predict.Ensemble.glmnet.Y
## 1                                  62
## 2                                 338
##          Prediction
## Reference   N   Y
##         N 415  62
##         Y  75 338
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.460674e-01   6.898797e-01   8.206560e-01   8.691620e-01   5.359551e-01 
## AccuracyPValue  McnemarPValue 
##   1.053824e-85   3.052551e-01 
##          model_id model_method
## 1 Ensemble.glmnet       glmnet
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         feats
## 1 sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob, sold.fctr.predict.All.X.no.rnorm.rf.prob, sold.fctr.predict.Max.cor.Y.rpart.prob, sold.fctr.predict.All.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.glm.prob, sold.fctr.predict.All.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob, sold.fctr.predict.All.Interact.X.bayesglm.prob, sold.fctr.predict.Interact.High.cor.Y.glm.prob, sold.fctr.predict.All.X.bayesglm.prob, sold.fctr.predict.Low.cor.X.glm.prob, sold.fctr.predict.All.X.glm.prob, sold.fctr.predict.All.Interact.X.glm.prob
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               9                      1.457                 0.042
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.6               1                1
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1             0.9962003                     1             1   0.9183253
##   opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1                    0.5       0.8314883        0.8460674
##   max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1              0.820656              0.869162     0.6898797
##   max.AccuracySD.fit max.KappaSD.fit
## 1                  0               0
if (is.null(glb_sel_mdl_id)) 
    glb_sel_mdl_id <- dsp_models_df[1, "model_id"] else 
    print(sprintf("User specified selection: %s", glb_sel_mdl_id))   
## [1] "User specified selection: Ensemble.glmnet"
myprint_mdl(glb_sel_mdl <- glb_models_lst[[glb_sel_mdl_id]])

##             Length Class      Mode     
## a0            97   -none-     numeric  
## beta        1455   dgCMatrix  S4       
## df            97   -none-     numeric  
## dim            2   -none-     numeric  
## lambda        97   -none-     numeric  
## dev.ratio     97   -none-     numeric  
## nulldev        1   -none-     numeric  
## npasses        1   -none-     numeric  
## jerr           1   -none-     numeric  
## offset         1   -none-     logical  
## classnames     2   -none-     character
## call           5   -none-     call     
## nobs           1   -none-     numeric  
## lambdaOpt      1   -none-     numeric  
## xNames        15   -none-     character
## problemType    1   -none-     character
## tuneValue      2   data.frame list     
## obsLevels      2   -none-     character
## [1] "min lambda > lambdaOpt:"
##                                       (Intercept) 
##                                        -2.5446998 
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 
##                                         2.3378403 
##          sold.fctr.predict.All.X.no.rnorm.rf.prob 
##                                         2.3819269 
##  sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 
##                                         0.1783523 
##    sold.fctr.predict.All.Interact.X.bayesglm.prob 
##                                         0.1199788 
## [1] "max lambda < lambdaOpt:"
##                                          (Intercept) 
##                                          -9.90797572 
##    sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 
##                                          11.78238766 
##             sold.fctr.predict.All.X.no.rnorm.rf.prob 
##                                          12.45455190 
##               sold.fctr.predict.Max.cor.Y.rpart.prob 
##                                          -0.80850918 
##          sold.fctr.predict.All.X.no.rnorm.rpart.prob 
##                                          -0.81784589 
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 
##                                          -0.80275250 
##         sold.fctr.predict.All.Interact.X.glmnet.prob 
##                                          -1.23144839 
##                  sold.fctr.predict.All.X.glmnet.prob 
##                                          -0.46608208 
##     sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 
##                                          -0.03955328 
##       sold.fctr.predict.All.Interact.X.bayesglm.prob 
##                                          -0.08909699 
##                sold.fctr.predict.All.X.bayesglm.prob 
##                                          -0.30426435 
##                 sold.fctr.predict.Low.cor.X.glm.prob 
##                                          -0.06816013 
## character(0)
## character(0)
## [1] TRUE
# From here to save(), this should all be in one function
#   these are executed in the same seq twice more:
#       fit.data.training & predict.data.new chunks
glb_OOBobs_df <- glb_get_predictions(df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id, 
                                     rsp_var_out=glb_rsp_var_out)
predct_accurate_var_name <- paste0(glb_rsp_var_out, glb_sel_mdl_id, ".accurate")
predct_error_var_name <- paste0(glb_rsp_var_out, glb_sel_mdl_id, ".err")
glb_OOBobs_df[, predct_accurate_var_name] <-
                    (glb_OOBobs_df[, glb_rsp_var] == 
                     glb_OOBobs_df[, paste0(glb_rsp_var_out, glb_sel_mdl_id)])

glb_featsimp_df <- 
    myget_feats_importance(mdl=glb_sel_mdl, featsimp_df=NULL)
glb_featsimp_df[, paste0(glb_sel_mdl_id, ".importance")] <- glb_featsimp_df$importance
print(glb_featsimp_df)
##                                                      importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob             100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob     98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob       7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob         4.587832
## sold.fctr.predict.Max.cor.Y.rpart.prob                 0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob            0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob   0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob           0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob                   0.000000
## sold.fctr.predict.All.X.glmnet.prob                    0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob         0.000000
## sold.fctr.predict.All.X.bayesglm.prob                  0.000000
## sold.fctr.predict.Low.cor.X.glm.prob                   0.000000
## sold.fctr.predict.All.X.glm.prob                       0.000000
## sold.fctr.predict.All.Interact.X.glm.prob              0.000000
##                                                      Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob                             100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob                     98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                       7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob                         4.587832
## sold.fctr.predict.Max.cor.Y.rpart.prob                                 0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                            0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob                   0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob                           0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob                                   0.000000
## sold.fctr.predict.All.X.glmnet.prob                                    0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                         0.000000
## sold.fctr.predict.All.X.bayesglm.prob                                  0.000000
## sold.fctr.predict.Low.cor.X.glm.prob                                   0.000000
## sold.fctr.predict.All.X.glm.prob                                       0.000000
## sold.fctr.predict.All.Interact.X.glm.prob                              0.000000
# Used again in fit.data.training & predict.data.new chunks
glb_analytics_diag_plots <- function(obs_df, mdl_id, prob_threshold=NULL) {
    featsimp_df <- glb_featsimp_df
    featsimp_df$feat <- gsub("`(.*?)`", "\\1", row.names(featsimp_df))    
    featsimp_df$feat.interact <- gsub("(.*?):(.*)", "\\2", featsimp_df$feat)
    featsimp_df$feat <- gsub("(.*?):(.*)", "\\1", featsimp_df$feat)    
    featsimp_df$feat.interact <- ifelse(featsimp_df$feat.interact == featsimp_df$feat, 
                                        NA, featsimp_df$feat.interact)
    featsimp_df$feat <- gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat)
    featsimp_df$feat.interact <- gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat.interact) 
    featsimp_df <- orderBy(~ -importance.max, summaryBy(importance ~ feat + feat.interact, 
                                                        data=featsimp_df, FUN=max))    
    #rex_str=":(.*)"; txt_vctr=tail(featsimp_df$feat); ret_lst <- regexec(rex_str, txt_vctr); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])    
    if (nrow(featsimp_df) > 5) {
        warning("Limiting important feature scatter plots to 5 out of ", nrow(featsimp_df))
        featsimp_df <- head(featsimp_df, 5)
    }
    
#     if (!all(is.na(featsimp_df$feat.interact)))
#         stop("not implemented yet")
    rsp_var_out <- paste0(glb_rsp_var_out, mdl_id)
    for (var in featsimp_df$feat) {
        plot_df <- melt(obs_df, id.vars=var, 
                        measure.vars=c(glb_rsp_var, rsp_var_out))

#         if (var == "<feat_name>") print(myplot_scatter(plot_df, var, "value", 
#                                              facet_colcol_name="variable") + 
#                       geom_vline(xintercept=<divider_val>, linetype="dotted")) else     
            print(myplot_scatter(plot_df, var, "value", colorcol_name="variable",
                                 facet_colcol_name="variable", jitter=TRUE) + 
                      guides(color=FALSE))
    }
    
    if (glb_is_regression) {
        if (nrow(featsimp_df) == 0)
            warning("No important features in glb_fin_mdl") else
            print(myplot_prediction_regression(df=obs_df, 
                        feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2],
                                      ".rownames"), 
                                               feat_y=featsimp_df$feat[1],
                        rsp_var=glb_rsp_var, rsp_var_out=rsp_var_out,
                        id_vars=glb_id_var)
    #               + facet_wrap(reformulate(featsimp_df$feat[2])) # if [1 or 2] is a factor
    #               + geom_point(aes_string(color="<col_name>.fctr")) #  to color the plot
                  )
    }    
    
    if (glb_is_classification) {
        if (nrow(featsimp_df) == 0)
            warning("No features in selected model are statistically important")
        else print(myplot_prediction_classification(df=obs_df, 
                feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2], 
                              ".rownames"),
                                               feat_y=featsimp_df$feat[1],
                     rsp_var=glb_rsp_var, 
                     rsp_var_out=rsp_var_out, 
                     id_vars=glb_id_var,
                    prob_threshold=prob_threshold)
#               + geom_hline(yintercept=<divider_val>, linetype = "dotted")
                )
    }    
}

if (glb_is_classification && glb_is_binomial)
    glb_analytics_diag_plots(obs_df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id, 
            prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
                                         "opt.prob.threshold.OOB"]) else
    glb_analytics_diag_plots(obs_df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id)                  

## [1] "Min/Max Boundaries: "
##      UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 5       10005         N                                              0.17
## 1859    11861         N                                              0.34
##      sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 5                                               N
## 1859                                            N
##      sold.fctr.predict.All.X.no.rnorm.rf.prob
## 5                                       0.222
## 1859                                    0.260
##      sold.fctr.predict.All.X.no.rnorm.rf
## 5                                      N
## 1859                                   N
##      sold.fctr.predict.Max.cor.Y.rpart.prob
## 5                                 0.2115028
## 1859                              0.2115028
##      sold.fctr.predict.Max.cor.Y.rpart
## 5                                    N
## 1859                                 N
##      sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 5                                      0.2115028
## 1859                                   0.2115028
##      sold.fctr.predict.All.X.no.rnorm.rpart
## 5                                         N
## 1859                                      N
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 5                                               0.2115028
## 1859                                            0.2115028
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 5                                                  N
## 1859                                               N
##      sold.fctr.predict.All.Interact.X.glmnet.prob
## 5                                       0.2589963
## 1859                                    0.2752168
##      sold.fctr.predict.All.Interact.X.glmnet
## 5                                          N
## 1859                                       N
##      sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 5                               0.4107302                               N
## 1859                            0.2427831                               N
##      sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 5                              0.2903361                              N
## 1859                           0.2841722                              N
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 5                                           0.1015625
## 1859                                        0.7777778
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 5                                              N
## 1859                                           Y
##      sold.fctr.predict.All.Interact.X.bayesglm.prob
## 5                                       0.009594626
## 1859                                    0.772431246
##      sold.fctr.predict.All.Interact.X.bayesglm
## 5                                            N
## 1859                                         Y
##      sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 5                                         0.4075147
## 1859                                      0.2427705
##      sold.fctr.predict.Interact.High.cor.Y.glm
## 5                                            N
## 1859                                         N
##      sold.fctr.predict.All.X.bayesglm.prob
## 5                               0.06255101
## 1859                            0.91073392
##      sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 5                                   N                           0.01473314
## 1859                                Y                           0.99999996
##      sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 5                                  N                       0.04705504
## 1859                               Y                       0.99999997
##      sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 5                              N                              2.220446e-16
## 1859                           Y                              1.000000e+00
##      sold.fctr.predict.All.Interact.X.glm
## 5                                       N
## 1859                                    Y
##      sold.fctr.predict.Ensemble.glmnet.prob
## 5                                 0.1626605
## 1859                              0.2826691
##      sold.fctr.predict.Ensemble.glmnet
## 5                                    N
## 1859                                 N
##      sold.fctr.predict.Ensemble.glmnet.accurate
## 5                                          TRUE
## 1859                                       TRUE
##      sold.fctr.predict.Ensemble.glmnet.error .label
## 5                                          0  10005
## 1859                                       0  11861
## [1] "Inaccurate: "
##      UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 1447    11448         Y                                             0.000
## 991     10991         Y                                             0.002
## 1582    11583         Y                                             0.008
## 834     10834         Y                                             0.012
## 962     10962         Y                                             0.040
## 1589    11590         Y                                             0.080
##      sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 1447                                            N
## 991                                             N
## 1582                                            N
## 834                                             N
## 962                                             N
## 1589                                            N
##      sold.fctr.predict.All.X.no.rnorm.rf.prob
## 1447                                    0.000
## 991                                     0.016
## 1582                                    0.030
## 834                                     0.112
## 962                                     0.110
## 1589                                    0.088
##      sold.fctr.predict.All.X.no.rnorm.rf
## 1447                                   N
## 991                                    N
## 1582                                   N
## 834                                    N
## 962                                    N
## 1589                                   N
##      sold.fctr.predict.Max.cor.Y.rpart.prob
## 1447                              0.2115028
## 991                               0.2115028
## 1582                              0.2115028
## 834                               0.2115028
## 962                               0.2115028
## 1589                              0.2115028
##      sold.fctr.predict.Max.cor.Y.rpart
## 1447                                 N
## 991                                  N
## 1582                                 N
## 834                                  N
## 962                                  N
## 1589                                 N
##      sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 1447                                   0.2115028
## 991                                    0.2115028
## 1582                                   0.2115028
## 834                                    0.2115028
## 962                                    0.2115028
## 1589                                   0.2115028
##      sold.fctr.predict.All.X.no.rnorm.rpart
## 1447                                      N
## 991                                       N
## 1582                                      N
## 834                                       N
## 962                                       N
## 1589                                      N
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 1447                                            0.2115028
## 991                                             0.2115028
## 1582                                            0.2115028
## 834                                             0.2115028
## 962                                             0.2115028
## 1589                                            0.2115028
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 1447                                               N
## 991                                                N
## 1582                                               N
## 834                                                N
## 962                                                N
## 1589                                               N
##      sold.fctr.predict.All.Interact.X.glmnet.prob
## 1447                                    0.3463592
## 991                                     0.2592719
## 1582                                    0.3293101
## 834                                     0.2557878
## 962                                     0.3138472
## 1589                                    0.2401187
##      sold.fctr.predict.All.Interact.X.glmnet
## 1447                                       N
## 991                                        N
## 1582                                       N
## 834                                        N
## 962                                        N
## 1589                                       N
##      sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 1447                            0.5774744                               N
## 991                             0.1375672                               N
## 1582                            0.4999143                               N
## 834                             0.1289347                               N
## 962                             0.4277115                               N
## 1589                            0.1408025                               N
##      sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 1447                           0.4342911                              N
## 991                            0.2403315                              N
## 1582                           0.3973766                              N
## 834                            0.2337584                              N
## 962                            0.3691777                              N
## 1589                           0.2186695                              N
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 1447                                       0.10156250
## 991                                        0.04191617
## 1582                                       0.10156250
## 834                                        0.04191617
## 962                                        0.10156250
## 1589                                       0.04191617
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 1447                                           N
## 991                                            N
## 1582                                           N
## 834                                            N
## 962                                            N
## 1589                                           N
##      sold.fctr.predict.All.Interact.X.bayesglm.prob
## 1447                                      0.1818562
## 991                                       0.1584976
## 1582                                      0.1505292
## 834                                       0.2988306
## 962                                       0.1508337
## 1589                                      0.1763513
##      sold.fctr.predict.All.Interact.X.bayesglm
## 1447                                         N
## 991                                          N
## 1582                                         N
## 834                                          N
## 962                                          N
## 1589                                         N
##      sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 1447                                      0.5714177
## 991                                       0.1389766
## 1582                                      0.4950528
## 834                                       0.1304135
## 962                                       0.4241684
## 1589                                      0.1421834
##      sold.fctr.predict.Interact.High.cor.Y.glm
## 1447                                         N
## 991                                          N
## 1582                                         N
## 834                                          N
## 962                                          N
## 1589                                         N
##      sold.fctr.predict.All.X.bayesglm.prob
## 1447                            0.42148585
## 991                             0.10512853
## 1582                            0.32530444
## 834                             0.19434095
## 962                             0.30871512
## 1589                            0.07684669
##      sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 1447                                N                           0.39474523
## 991                                 N                           0.08233980
## 1582                                N                           0.30227038
## 834                                 N                           0.15101069
## 962                                 N                           0.35520389
## 1589                                N                           0.07338957
##      sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 1447                               N                       0.40643422
## 991                                N                       0.08679229
## 1582                               N                       0.30697032
## 834                                N                       0.16182543
## 962                                N                       0.30484639
## 1589                               N                       0.07398338
##      sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 1447                           N                              2.220446e-16
## 991                            N                              2.220446e-16
## 1582                           N                              2.220446e-16
## 834                            N                              1.000000e+00
## 962                            N                              2.220446e-16
## 1589                           N                              2.220446e-16
##      sold.fctr.predict.All.Interact.X.glm
## 1447                                    N
## 991                                     N
## 1582                                    N
## 834                                     Y
## 962                                     N
## 1589                                    N
##      sold.fctr.predict.Ensemble.glmnet.prob
## 1447                             0.07069188
## 991                              0.07277253
## 1582                             0.07681084
## 834                              0.09375822
## 962                              0.09865429
## 1589                             0.10181302
##      sold.fctr.predict.Ensemble.glmnet
## 1447                                 N
## 991                                  N
## 1582                                 N
## 834                                  N
## 962                                  N
## 1589                                 N
##      sold.fctr.predict.Ensemble.glmnet.accurate
## 1447                                      FALSE
## 991                                       FALSE
## 1582                                      FALSE
## 834                                       FALSE
## 962                                       FALSE
## 1589                                      FALSE
##      sold.fctr.predict.Ensemble.glmnet.error
## 1447                              -0.4293081
## 991                               -0.4272275
## 1582                              -0.4231892
## 834                               -0.4062418
## 962                               -0.4013457
## 1589                              -0.3981870
##      UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 991     10991         Y                                             0.002
## 19      10019         Y                                             0.340
## 1107    11107         Y                                             0.530
## 1541    11542         N                                             0.640
## 1470    11471         N                                             0.766
## 296     10296         N                                             0.986
##      sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 991                                             N
## 19                                              N
## 1107                                            N
## 1541                                            Y
## 1470                                            Y
## 296                                             Y
##      sold.fctr.predict.All.X.no.rnorm.rf.prob
## 991                                     0.016
## 19                                      0.330
## 1107                                    0.420
## 1541                                    0.488
## 1470                                    0.782
## 296                                     0.988
##      sold.fctr.predict.All.X.no.rnorm.rf
## 991                                    N
## 19                                     N
## 1107                                   N
## 1541                                   N
## 1470                                   Y
## 296                                    Y
##      sold.fctr.predict.Max.cor.Y.rpart.prob
## 991                               0.2115028
## 19                                0.2115028
## 1107                              0.2115028
## 1541                              0.2115028
## 1470                              0.9228571
## 296                               0.9228571
##      sold.fctr.predict.Max.cor.Y.rpart
## 991                                  N
## 19                                   N
## 1107                                 N
## 1541                                 N
## 1470                                 Y
## 296                                  Y
##      sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 991                                    0.2115028
## 19                                     0.2115028
## 1107                                   0.2115028
## 1541                                   0.2115028
## 1470                                   0.9228571
## 296                                    0.9228571
##      sold.fctr.predict.All.X.no.rnorm.rpart
## 991                                       N
## 19                                        N
## 1107                                      N
## 1541                                      N
## 1470                                      Y
## 296                                       Y
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 991                                             0.2115028
## 19                                              0.2115028
## 1107                                            0.2115028
## 1541                                            0.2115028
## 1470                                            0.9228571
## 296                                             0.9228571
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 991                                                N
## 19                                                 N
## 1107                                               N
## 1541                                               N
## 1470                                               Y
## 296                                                Y
##      sold.fctr.predict.All.Interact.X.glmnet.prob
## 991                                     0.2592719
## 19                                      0.2758950
## 1107                                    0.2791061
## 1541                                    0.2525105
## 1470                                    0.9261020
## 296                                     0.7328073
##      sold.fctr.predict.All.Interact.X.glmnet
## 991                                        N
## 19                                         N
## 1107                                       N
## 1541                                       N
## 1470                                       Y
## 296                                        Y
##      sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 991                             0.1375672                               N
## 19                              0.1842910                               N
## 1107                            0.2577252                               N
## 1541                            0.1211751                               N
## 1470                            0.9396433                               Y
## 296                             0.8073492                               Y
##      sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 991                            0.2403315                              N
## 19                             0.2725672                              N
## 1107                           0.2921147                              N
## 1541                           0.2276382                              N
## 1470                           0.8743624                              Y
## 296                            0.7269362                              Y
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 991                                        0.04191617
## 19                                         0.53333333
## 1107                                       0.13333333
## 1541                                       0.04191617
## 1470                                       0.94171779
## 296                                        0.94171779
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 991                                            N
## 19                                             Y
## 1107                                           N
## 1541                                           N
## 1470                                           Y
## 296                                            Y
##      sold.fctr.predict.All.Interact.X.bayesglm.prob
## 991                                       0.1584976
## 19                                        0.2432798
## 1107                                      0.1954608
## 1541                                      0.3906655
## 1470                                      0.9995656
## 296                                       0.9023783
##      sold.fctr.predict.All.Interact.X.bayesglm
## 991                                          N
## 19                                           N
## 1107                                         N
## 1541                                         N
## 1470                                         Y
## 296                                          Y
##      sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 991                                       0.1389766
## 19                                        0.1851791
## 1107                                      0.2574524
## 1541                                      0.1227075
## 1470                                      0.9530526
## 296                                       0.8319335
##      sold.fctr.predict.Interact.High.cor.Y.glm
## 991                                          N
## 19                                           N
## 1107                                         N
## 1541                                         N
## 1470                                         Y
## 296                                          Y
##      sold.fctr.predict.All.X.bayesglm.prob
## 991                              0.1051285
## 19                               0.1878507
## 1107                             0.3366319
## 1541                             0.2572607
## 1470                             0.9942701
## 296                              0.8511794
##      sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 991                                 N                            0.0823398
## 19                                  N                            0.1796292
## 1107                                N                            0.4095530
## 1541                                N                            0.2354375
## 1470                                Y                            0.9911401
## 296                                 Y                            0.8569773
##      sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 991                                N                       0.08679229
## 19                                 N                       0.17948604
## 1107                               N                       0.40596515
## 1541                               N                       0.25204312
## 1470                               Y                       0.99569308
## 296                                Y                       0.86092054
##      sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 991                            N                              2.220446e-16
## 19                             N                              1.000000e+00
## 1107                           N                              2.220446e-16
## 1541                           N                              1.000000e+00
## 1470                           Y                              1.000000e+00
## 296                            Y                              1.000000e+00
##      sold.fctr.predict.All.Interact.X.glm
## 991                                     N
## 19                                      Y
## 1107                                    N
## 1541                                    Y
## 1470                                    Y
## 296                                     Y
##      sold.fctr.predict.Ensemble.glmnet.prob
## 991                              0.07277253
## 19                               0.29712381
## 1107                             0.43653608
## 1541                             0.54588029
## 1470                             0.80790361
## 296                              0.92156888
##      sold.fctr.predict.Ensemble.glmnet
## 991                                  N
## 19                                   N
## 1107                                 N
## 1541                                 Y
## 1470                                 Y
## 296                                  Y
##      sold.fctr.predict.Ensemble.glmnet.accurate
## 991                                       FALSE
## 19                                        FALSE
## 1107                                      FALSE
## 1541                                      FALSE
## 1470                                      FALSE
## 296                                       FALSE
##      sold.fctr.predict.Ensemble.glmnet.error
## 991                              -0.42722747
## 19                               -0.20287619
## 1107                             -0.06346392
## 1541                              0.04588029
## 1470                              0.30790361
## 296                               0.42156888
##     UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 472    10472         N                                             0.986
## 841    10841         N                                             0.970
## 955    10955         N                                             0.992
## 199    10199         N                                             0.970
## 296    10296         N                                             0.986
## 416    10416         N                                             1.000
##     sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 472                                            Y
## 841                                            Y
## 955                                            Y
## 199                                            Y
## 296                                            Y
## 416                                            Y
##     sold.fctr.predict.All.X.no.rnorm.rf.prob
## 472                                    0.932
## 841                                    0.978
## 955                                    0.960
## 199                                    0.984
## 296                                    0.988
## 416                                    0.994
##     sold.fctr.predict.All.X.no.rnorm.rf
## 472                                   Y
## 841                                   Y
## 955                                   Y
## 199                                   Y
## 296                                   Y
## 416                                   Y
##     sold.fctr.predict.Max.cor.Y.rpart.prob
## 472                              0.9228571
## 841                              0.9228571
## 955                              0.9228571
## 199                              0.9228571
## 296                              0.9228571
## 416                              0.9228571
##     sold.fctr.predict.Max.cor.Y.rpart
## 472                                 Y
## 841                                 Y
## 955                                 Y
## 199                                 Y
## 296                                 Y
## 416                                 Y
##     sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 472                                   0.9228571
## 841                                   0.9228571
## 955                                   0.9228571
## 199                                   0.9228571
## 296                                   0.9228571
## 416                                   0.9228571
##     sold.fctr.predict.All.X.no.rnorm.rpart
## 472                                      Y
## 841                                      Y
## 955                                      Y
## 199                                      Y
## 296                                      Y
## 416                                      Y
##     sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 472                                            0.9228571
## 841                                            0.9228571
## 955                                            0.9228571
## 199                                            0.9228571
## 296                                            0.9228571
## 416                                            0.9228571
##     sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 472                                               Y
## 841                                               Y
## 955                                               Y
## 199                                               Y
## 296                                               Y
## 416                                               Y
##     sold.fctr.predict.All.Interact.X.glmnet.prob
## 472                                    0.6889045
## 841                                    0.7587202
## 955                                    0.7890379
## 199                                    0.8058413
## 296                                    0.7328073
## 416                                    0.7922638
##     sold.fctr.predict.All.Interact.X.glmnet
## 472                                       Y
## 841                                       Y
## 955                                       Y
## 199                                       Y
## 296                                       Y
## 416                                       Y
##     sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 472                            0.7556156                               Y
## 841                            0.8296527                               Y
## 955                            0.8525572                               Y
## 199                            0.8704164                               Y
## 296                            0.8073492                               Y
## 416                            0.8446071                               Y
##     sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 472                           0.7229457                              Y
## 841                           0.7392313                              Y
## 955                           0.7568769                              Y
## 199                           0.7608577                              Y
## 296                           0.7269362                              Y
## 416                           0.7744068                              Y
##     sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 472                                        0.8235294
## 841                                        0.9417178
## 955                                        0.9417178
## 199                                        0.9417178
## 296                                        0.9417178
## 416                                        0.9417178
##     sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 472                                           Y
## 841                                           Y
## 955                                           Y
## 199                                           Y
## 296                                           Y
## 416                                           Y
##     sold.fctr.predict.All.Interact.X.bayesglm.prob
## 472                                      0.8486180
## 841                                      0.9777102
## 955                                      0.9458590
## 199                                      0.9472238
## 296                                      0.9023783
## 416                                      0.9751765
##     sold.fctr.predict.All.Interact.X.bayesglm
## 472                                         Y
## 841                                         Y
## 955                                         Y
## 199                                         Y
## 296                                         Y
## 416                                         Y
##     sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 472                                      0.7859171
## 841                                      0.7496888
## 955                                      0.8846367
## 199                                      0.8631023
## 296                                      0.8319335
## 416                                      0.8647130
##     sold.fctr.predict.Interact.High.cor.Y.glm
## 472                                         Y
## 841                                         Y
## 955                                         Y
## 199                                         Y
## 296                                         Y
## 416                                         Y
##     sold.fctr.predict.All.X.bayesglm.prob sold.fctr.predict.All.X.bayesglm
## 472                             0.8540818                                Y
## 841                             0.8985471                                Y
## 955                             0.8917715                                Y
## 199                             0.9209272                                Y
## 296                             0.8511794                                Y
## 416                             0.9187159                                Y
##     sold.fctr.predict.Low.cor.X.glm.prob sold.fctr.predict.Low.cor.X.glm
## 472                            0.8534160                               Y
## 841                            0.9200228                               Y
## 955                            0.9030208                               Y
## 199                            0.9527908                               Y
## 296                            0.8569773                               Y
## 416                            0.9193082                               Y
##     sold.fctr.predict.All.X.glm.prob sold.fctr.predict.All.X.glm
## 472                        0.8624339                           Y
## 841                        0.8355489                           Y
## 955                        0.9015627                           Y
## 199                        0.9260455                           Y
## 296                        0.8609205                           Y
## 416                        0.9250955                           Y
##     sold.fctr.predict.All.Interact.X.glm.prob
## 472                              1.000000e+00
## 841                              1.000000e+00
## 955                              1.000000e+00
## 199                              2.220446e-16
## 296                              1.000000e+00
## 416                              1.000000e+00
##     sold.fctr.predict.All.Interact.X.glm
## 472                                    Y
## 841                                    Y
## 955                                    Y
## 199                                    N
## 296                                    Y
## 416                                    Y
##     sold.fctr.predict.Ensemble.glmnet.prob
## 472                              0.9088360
## 841                              0.9175190
## 955                              0.9179147
## 199                              0.9183720
## 296                              0.9215689
## 416                              0.9255775
##     sold.fctr.predict.Ensemble.glmnet
## 472                                 Y
## 841                                 Y
## 955                                 Y
## 199                                 Y
## 296                                 Y
## 416                                 Y
##     sold.fctr.predict.Ensemble.glmnet.accurate
## 472                                      FALSE
## 841                                      FALSE
## 955                                      FALSE
## 199                                      FALSE
## 296                                      FALSE
## 416                                      FALSE
##     sold.fctr.predict.Ensemble.glmnet.error
## 472                               0.4088360
## 841                               0.4175190
## 955                               0.4179147
## 199                               0.4183720
## 296                               0.4215689
## 416                               0.4255775

write.csv(glb_OOBobs_df[, c(glb_id_var, 
                grep(glb_rsp_var, names(glb_OOBobs_df), fixed=TRUE, value=TRUE))], 
    paste0(gsub(".", "_", paste0(glb_out_pfx, glb_sel_mdl_id), fixed=TRUE), 
           "_OOBobs.csv"), row.names=FALSE)

glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
##         label step_major step_minor     bgn     end elapsed
## 12 fit.models          7          2 236.655 268.982  32.327
## 13 fit.models          7          3 268.982      NA      NA
# if (sum(is.na(glb_allobs_df$D.P.http)) > 0)
#         stop("fit.models_3: Why is this happening ?")

#stop(here"); glb_to_sav()
sync_glb_obs_df <- function() {
    # Merge or cbind ?
    for (col in setdiff(names(glb_fitobs_df), names(glb_trnobs_df)))
        glb_trnobs_df[glb_trnobs_df$.lcn == "Fit", col] <<- glb_fitobs_df[, col]
    for (col in setdiff(names(glb_fitobs_df), names(glb_allobs_df)))
        glb_allobs_df[glb_allobs_df$.lcn == "Fit", col] <<- glb_fitobs_df[, col]
    if (all(is.na(glb_newobs_df[, glb_rsp_var])))
        for (col in setdiff(names(glb_OOBobs_df), names(glb_trnobs_df)))
            glb_trnobs_df[glb_trnobs_df$.lcn == "OOB", col] <<- glb_OOBobs_df[, col]
    for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
        glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <<- glb_OOBobs_df[, col]
}
sync_glb_obs_df()

print(setdiff(names(glb_newobs_df), names(glb_allobs_df)))
## character(0)
if (glb_save_envir)
    save(glb_feats_df, 
         glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
         glb_models_df, dsp_models_df, glb_models_lst, glb_sel_mdl, glb_sel_mdl_id,
         glb_model_type,
        file=paste0(glb_out_pfx, "selmdl_dsk.RData"))
#load(paste0(glb_out_pfx, "selmdl_dsk.RData"))

rm(ret_lst)
replay.petrisim(pn=glb_analytics_pn, 
    replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs, 
        "model.selected")), flip_coord=TRUE)
## time trans    "bgn " "fit.data.training.all " "predict.data.new " "end " 
## 0.0000   multiple enabled transitions:  data.training.all data.new model.selected    firing:  data.training.all 
## 1.0000    1   2 1 0 0 
## 1.0000   multiple enabled transitions:  data.training.all data.new model.selected model.final data.training.all.prediction   firing:  data.new 
## 2.0000    2   1 1 1 0 
## 2.0000   multiple enabled transitions:  data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction   firing:  model.selected 
## 3.0000    3   0 2 1 0

glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=TRUE)
##                label step_major step_minor     bgn     end elapsed
## 13        fit.models          7          3 268.982 276.637   7.655
## 14 fit.data.training          8          0 276.638      NA      NA

Step 8.0: fit data training

#load(paste0(glb_inp_pfx, "dsk.RData"))

if (!is.null(glb_fin_mdl_id) && (glb_fin_mdl_id %in% names(glb_models_lst))) {
    warning("Final model same as user selected model")
    glb_fin_mdl <- glb_sel_mdl
} else {    

#stop(here"); glb_to_sav()    
    if (grepl("Ensemble", glb_sel_mdl_id)) {
        # Find which models are relevant
        mdlimp_df <- subset(varImp(glb_sel_mdl)$importance, Overall > 5)
        # Fit selected models on glb_trnobs_df
        for (mdl_id in gsub(".prob", "", 
                            gsub(glb_rsp_var_out, "", row.names(mdlimp_df), fixed=TRUE),
                            fixed=TRUE)) {
            mdl_id_components <- unlist(strsplit(mdl_id, "[.]"))
            ret_lst <- 
                myfit_mdl(model_id=paste0(c(head(mdl_id_components, -1), "Train"),
                                          collapse="."), 
                        model_method=tail(mdl_id_components, 1),
                        indep_vars_vctr=trim(unlist(strsplit(
                        glb_models_df[glb_models_df$model_id == mdl_id, "feats"], "[,]"))),
                        model_type=glb_model_type,
                        rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out, 
                        fit_df=glb_trnobs_df, OOB_df=NULL,
                        n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
            glb_trnobs_df <- glb_get_predictions(df=glb_trnobs_df,
                                                mdl_id=tail(glb_models_df$model_id, 1), 
                                                rsp_var_out=glb_rsp_var_out,
                                                prob_threshold_def=subset(glb_models_df,
                                                model_id == mdl_id)$opt.prob.threshold.OOB)
            glb_newobs_df <- glb_get_predictions(df=glb_newobs_df,
                                                mdl_id=tail(glb_models_df$model_id, 1), 
                                                rsp_var_out=glb_rsp_var_out,
                                                prob_threshold_def=subset(glb_models_df,
                                                model_id == mdl_id)$opt.prob.threshold.OOB)
        }    
    }
    
    # "Final" model
    if ((model_method <- glb_sel_mdl$method) == "custom")
        # get actual method from the model_id
        model_method <- tail(unlist(strsplit(glb_sel_mdl_id, "[.]")), 1)
    
    if (grepl("Ensemble", glb_sel_mdl_id)) {
        # Find which models are relevant
        mdlimp_df <- subset(varImp(glb_sel_mdl)$importance, Overall > 5)
        if (glb_is_classification && glb_is_binomial)
            indep_vars_vctr <- gsub("(.*)\\.(.*)\\.prob", "\\1\\.Train\\.\\2\\.prob",
                                    row.names(mdlimp_df)) else
            indep_vars_vctr <- gsub("(.*)\\.(.*)", "\\1\\.Train\\.\\2",
                                    row.names(mdlimp_df))
    } else indep_vars_vctr <- 
                trim(unlist(strsplit(glb_models_df[glb_models_df$model_id == glb_sel_mdl_id
                                                   , "feats"], "[,]")))
        
    # Discontinuing use of tune_finmdl_df; 
    #   since final model needs to be cved on glb_trnobs_df
    tune_finmdl_df <- NULL
    if (nrow(glb_sel_mdl$bestTune) > 0) {
        for (param in names(glb_sel_mdl$bestTune)) {
            #print(sprintf("param: %s", param))
            if (glb_sel_mdl$bestTune[1, param] != "none")
                tune_finmdl_df <- rbind(tune_finmdl_df, 
                    data.frame(parameter=param, 
                               min=glb_sel_mdl$bestTune[1, param], 
                               max=glb_sel_mdl$bestTune[1, param], 
                               by=1)) # by val does not matter
        }
    } 
    
    # Sync with parameters in mydsutils.R
    require(gdata)
    ret_lst <- myfit_mdl(model_id="Final", model_method=model_method,
                        indep_vars_vctr=indep_vars_vctr, 
                         model_type=glb_model_type,
                            rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out, 
                            fit_df=glb_trnobs_df, OOB_df=NULL,
                            n_cv_folds=glb_n_cv_folds, 
                            tune_models_df=glb_tune_models_df, #tune_finmdl_df,
                         # Automate from here
                         #  Issues if glb_sel_mdl$method == "rf" b/c trainControl is "oob"; not "cv"
                            model_loss_mtrx=glb_model_metric_terms,
                            model_summaryFunction=glb_sel_mdl$control$summaryFunction,
                            model_metric=glb_sel_mdl$metric,
                            model_metric_maximize=glb_sel_mdl$maximize)
    glb_fin_mdl <- glb_models_lst[[length(glb_models_lst)]] 
    glb_fin_mdl_id <- glb_models_df[length(glb_models_lst), "model_id"]
}
## [1] "fitting model: All.Interact.X.no.rnorm.Train.rf"
## [1] "    indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 129 on full training set

##                 Length Class      Mode     
## call               4   -none-     call     
## type               1   -none-     character
## predicted       1859   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           3718   matrix     numeric  
## oob.times       1859   -none-     numeric  
## classes            2   -none-     character
## importance       257   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y               1859   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           257   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6325855
## 2        0.1 0.8557214
## 3        0.2 0.9513274
## 4        0.3 0.9834191
## 5        0.4 1.0000000
## 6        0.5 1.0000000
## 7        0.6 0.9982528
## 8        0.7 0.9644792
## 9        0.8 0.8974359
## 10       0.9 0.8227242
## 11       1.0 0.3284742
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.N
## 1         N                                                  999
## 2         Y                                                   NA
##   sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.Y
## 1                                                   NA
## 2                                                  860
##          Prediction
## Reference   N   Y
##         N 999   0
##         Y   0 860
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      1.0000000      1.0000000      0.9980176      1.0000000      0.5373857 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN 
##                           model_id model_method
## 1 All.Interact.X.no.rnorm.Train.rf           rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                     83.205                30.486
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.5               1         0.838628
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1             0.9980176                     1      0.673513
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01046481      0.02054501
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.6
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.6

## [1] "fitting model: All.X.no.rnorm.Train.rf"
## [1] "    indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 77 on full training set

##                 Length Class      Mode     
## call               4   -none-     call     
## type               1   -none-     character
## predicted       1859   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           3718   matrix     numeric  
## oob.times       1859   -none-     numeric  
## classes            2   -none-     character
## importance       153   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y               1859   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           153   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6325855
## 2        0.1 0.8535980
## 3        0.2 0.9534368
## 4        0.3 0.9839817
## 5        0.4 0.9982589
## 6        0.5 1.0000000
## 7        0.6 0.9994183
## 8        0.7 0.9644792
## 9        0.8 0.8988476
## 10       0.9 0.8283379
## 11       1.0 0.2340862
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.All.X.no.rnorm.Train.rf.N
## 1         N                                         999
## 2         Y                                          NA
##   sold.fctr.predict.All.X.no.rnorm.Train.rf.Y
## 1                                          NA
## 2                                         860
##          Prediction
## Reference   N   Y
##         N 999   0
##         Y   0 860
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      1.0000000      1.0000000      0.9980176      1.0000000      0.5373857 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN 
##                  model_id model_method
## 1 All.X.no.rnorm.Train.rf           rf
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                     45.755                16.036
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.5               1        0.8413179
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1             0.9980176                     1     0.6787273
##   max.AccuracySD.fit max.KappaSD.fit
## 1        0.008745356      0.01827901
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.5
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.5

## [1] "fitting model: Max.cor.Y.cv.0.cp.0.Train.rpart"
## [1] "    indep_vars: biddable, startprice.diff"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00174 on full training set
## Warning in myfit_mdl(model_id = paste0(c(head(mdl_id_components, -1),
## "Train"), : model's bestTune found at an extreme of tuneGrid for parameter:
## cp

## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7, 
##     cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2, 
##     surrogatestyle = 0, maxdepth = 30, xval = 0))
##   n= 1859 
## 
##            CP nsplit rel error
## 1 0.515116279      0 1.0000000
## 2 0.147674419      1 0.4848837
## 3 0.001744186      2 0.3372093
## 
## Variable importance
##        biddable startprice.diff 
##              61              39 
## 
## Node number 1: 1859 observations,    complexity param=0.5151163
##   predicted class=N  expected loss=0.4626143  P(node) =1
##     class counts:   999   860
##    probabilities: 0.537 0.463 
##   left son=2 (1022 obs) right son=3 (837 obs)
##   Primary splits:
##       biddable        < 0.5      to the left,  improve=277.7532, (0 missing)
##       startprice.diff < 41.5325  to the right, improve=181.7181, (0 missing)
##   Surrogate splits:
##       startprice.diff < 250.1071 to the left,  agree=0.557, adj=0.016, (0 split)
## 
## Node number 2: 1022 observations
##   predicted class=N  expected loss=0.2152642  P(node) =0.5497579
##     class counts:   802   220
##    probabilities: 0.785 0.215 
## 
## Node number 3: 837 observations,    complexity param=0.1476744
##   predicted class=Y  expected loss=0.2353644  P(node) =0.4502421
##     class counts:   197   640
##    probabilities: 0.235 0.765 
##   left son=6 (167 obs) right son=7 (670 obs)
##   Primary splits:
##       startprice.diff < 59.22341 to the right, improve=173.5195, (0 missing)
## 
## Node number 6: 167 observations
##   predicted class=N  expected loss=0.1197605  P(node) =0.08983324
##     class counts:   147    20
##    probabilities: 0.880 0.120 
## 
## Node number 7: 670 observations
##   predicted class=Y  expected loss=0.07462687  P(node) =0.3604088
##     class counts:    50   620
##    probabilities: 0.075 0.925 
## 
## n= 1859 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 1859 860 N (0.53738569 0.46261431)  
##   2) biddable< 0.5 1022 220 N (0.78473581 0.21526419) *
##   3) biddable>=0.5 837 197 Y (0.23536440 0.76463560)  
##     6) startprice.diff>=59.22341 167  20 N (0.88023952 0.11976048) *
##     7) startprice.diff< 59.22341 670  50 Y (0.07462687 0.92537313) *
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6325855
## 2        0.1 0.6325855
## 3        0.2 0.6583072
## 4        0.3 0.8104575
## 5        0.4 0.8104575
## 6        0.5 0.8104575
## 7        0.6 0.8104575
## 8        0.7 0.8104575
## 9        0.8 0.8104575
## 10       0.9 0.8104575
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.N
## 1         N                                                 949
## 2         Y                                                 240
##   sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.Y
## 1                                                  50
## 2                                                 620
##          Prediction
## Reference   N   Y
##         N 949  50
##         Y 240 620
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.440022e-01   6.813526e-01   8.266980e-01   8.602130e-01   5.373857e-01 
## AccuracyPValue  McnemarPValue 
##  6.399625e-173   1.276209e-28 
##                          model_id model_method                     feats
## 1 Max.cor.Y.cv.0.cp.0.Train.rpart        rpart biddable, startprice.diff
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               3                      1.082                 0.015
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1   0.8449263                    0.9       0.8104575        0.8267914
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1              0.826698              0.860213     0.6496665
##   max.AccuracySD.fit max.KappaSD.fit
## 1         0.01674491      0.03182114
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.3
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.3

## [1] "fitting model: Final.glmnet"
## [1] "    indep_vars: sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob, sold.fctr.predict.All.X.no.rnorm.Train.rf.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0972 on full training set
## Warning in myfit_mdl(model_id = "Final", model_method = model_method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: alpha
## Warning in myfit_mdl(model_id = "Final", model_method = model_method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda

##             Length Class      Mode     
## a0          100    -none-     numeric  
## beta        300    dgCMatrix  S4       
## df          100    -none-     numeric  
## dim           2    -none-     numeric  
## lambda      100    -none-     numeric  
## dev.ratio   100    -none-     numeric  
## nulldev       1    -none-     numeric  
## npasses       1    -none-     numeric  
## jerr          1    -none-     numeric  
## offset        1    -none-     logical  
## classnames    2    -none-     character
## call          5    -none-     call     
## nobs          1    -none-     numeric  
## lambdaOpt     1    -none-     numeric  
## xNames        3    -none-     character
## problemType   1    -none-     character
## tuneValue     2    data.frame list     
## obsLevels     2    -none-     character
## [1] "min lambda > lambdaOpt:"
##                                             (Intercept) 
##                                              -2.7019114 
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 
##                                               2.3329814 
##          sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 
##                                               2.3451046 
##  sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 
##                                               0.7356721 
## [1] "max lambda < lambdaOpt:"
##                                             (Intercept) 
##                                               -7.757483 
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 
##                                                8.899955 
##          sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 
##                                                9.018704 
##  sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 
##                                               -2.293058 
## character(0)
## character(0)
## [1] "    calling mypredict_mdl for fit:"

##    threshold   f.score
## 1        0.0 0.6325855
## 2        0.1 0.8194378
## 3        0.2 0.9630459
## 4        0.3 0.9783845
## 5        0.4 0.9907834
## 6        0.5 1.0000000
## 7        0.6 0.9953271
## 8        0.7 0.9486553
## 9        0.8 0.8780170
## 10       0.9 0.8088643
## 11       1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
##   sold.fctr sold.fctr.predict.Final.glmnet.N
## 1         N                              999
## 2         Y                               NA
##   sold.fctr.predict.Final.glmnet.Y
## 1                               NA
## 2                              860
##          Prediction
## Reference   N   Y
##         N 999   0
##         Y   0 860
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      1.0000000      1.0000000      0.9980176      1.0000000      0.5373857 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN
## Warning in mypredict_mdl(mdl, df = fit_df, rsp_var, rsp_var_out,
## model_id_method, : Expecting 1 metric: Accuracy; recd: Accuracy, Kappa;
## retaining Accuracy only

##       model_id model_method
## 1 Final.glmnet       glmnet
##                                                                                                                                                             feats
## 1 sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob, sold.fctr.predict.All.X.no.rnorm.Train.rf.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
##   max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1               9                      1.457                 0.041
##   max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1           1                    0.5               1                1
##   max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1             0.9980176                     1             1
##   max.AccuracySD.fit max.KappaSD.fit
## 1                  0               0
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=FALSE)
##                label step_major step_minor     bgn     end elapsed
## 14 fit.data.training          8          0 276.638 419.196 142.558
## 15 fit.data.training          8          1 419.196      NA      NA
#```

#```{r fit.data.training_1, cache=FALSE}
#stop(here"); glb_to_sav()
glb_trnobs_df <- glb_get_predictions(df=glb_trnobs_df, mdl_id=glb_fin_mdl_id, 
                                     rsp_var_out=glb_rsp_var_out,
    prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial, 
        glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, "opt.prob.threshold.OOB"], NULL))
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## glb_fin_mdl_id, : Using default probability threshold: 0.5
glb_featsimp_df <- myget_feats_importance(mdl=glb_fin_mdl, featsimp_df=glb_featsimp_df)
glb_featsimp_df[, paste0(glb_fin_mdl_id, ".importance")] <- glb_featsimp_df$importance
print(glb_featsimp_df)
##                                                         Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob                                  NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob                         NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob                          NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob                            4.587832
## sold.fctr.predict.All.Interact.X.glm.prob                                 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob                              0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob                        98.138116
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob                      0.000000
## sold.fctr.predict.All.X.bayesglm.prob                                     0.000000
## sold.fctr.predict.All.X.glm.prob                                          0.000000
## sold.fctr.predict.All.X.glmnet.prob                                       0.000000
## sold.fctr.predict.All.X.no.rnorm.rf.prob                                100.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                               0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                            0.000000
## sold.fctr.predict.Low.cor.X.glm.prob                                      0.000000
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                          7.031892
## sold.fctr.predict.Max.cor.Y.glm.prob                                      0.000000
## sold.fctr.predict.Max.cor.Y.rpart.prob                                    0.000000
##                                                         importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob           100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob   99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob     0.00000
## sold.fctr.predict.All.Interact.X.bayesglm.prob                  NA
## sold.fctr.predict.All.Interact.X.glm.prob                       NA
## sold.fctr.predict.All.Interact.X.glmnet.prob                    NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob               NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob            NA
## sold.fctr.predict.All.X.bayesglm.prob                           NA
## sold.fctr.predict.All.X.glm.prob                                NA
## sold.fctr.predict.All.X.glmnet.prob                             NA
## sold.fctr.predict.All.X.no.rnorm.rf.prob                        NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                     NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                  NA
## sold.fctr.predict.Low.cor.X.glm.prob                            NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                NA
## sold.fctr.predict.Max.cor.Y.glm.prob                            NA
## sold.fctr.predict.Max.cor.Y.rpart.prob                          NA
##                                                         Final.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob                        100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob                99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob                  0.00000
## sold.fctr.predict.All.Interact.X.bayesglm.prob                               NA
## sold.fctr.predict.All.Interact.X.glm.prob                                    NA
## sold.fctr.predict.All.Interact.X.glmnet.prob                                 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob                            NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob                         NA
## sold.fctr.predict.All.X.bayesglm.prob                                        NA
## sold.fctr.predict.All.X.glm.prob                                             NA
## sold.fctr.predict.All.X.glmnet.prob                                          NA
## sold.fctr.predict.All.X.no.rnorm.rf.prob                                     NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                                  NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                               NA
## sold.fctr.predict.Low.cor.X.glm.prob                                         NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                             NA
## sold.fctr.predict.Max.cor.Y.glm.prob                                         NA
## sold.fctr.predict.Max.cor.Y.rpart.prob                                       NA
if (glb_is_classification && glb_is_binomial)
    glb_analytics_diag_plots(obs_df=glb_trnobs_df, mdl_id=glb_fin_mdl_id, 
            prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
                                         "opt.prob.threshold.OOB"]) else
    glb_analytics_diag_plots(obs_df=glb_trnobs_df, mdl_id=glb_fin_mdl_id)                  

## [1] "Min/Max Boundaries: "
##      UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 1       10001         N                                             0.228
## 1859    11861         N                                                NA
##      sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 1                                               N
## 1859                                         <NA>
##      sold.fctr.predict.All.X.no.rnorm.rf.prob
## 1                                       0.246
## 1859                                       NA
##      sold.fctr.predict.All.X.no.rnorm.rf
## 1                                      N
## 1859                                <NA>
##      sold.fctr.predict.Max.cor.Y.rpart.prob
## 1                                 0.2115028
## 1859                                     NA
##      sold.fctr.predict.Max.cor.Y.rpart
## 1                                    N
## 1859                              <NA>
##      sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 1                                      0.2115028
## 1859                                          NA
##      sold.fctr.predict.All.X.no.rnorm.rpart
## 1                                         N
## 1859                                   <NA>
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 1                                               0.2115028
## 1859                                                   NA
##      sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 1                                                  N
## 1859                                            <NA>
##      sold.fctr.predict.All.Interact.X.glmnet.prob
## 1                                       0.2764637
## 1859                                           NA
##      sold.fctr.predict.All.Interact.X.glmnet
## 1                                          N
## 1859                                    <NA>
##      sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 1                               0.2015561                               N
## 1859                                   NA                            <NA>
##      sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 1                              0.2772074                              N
## 1859                                  NA                           <NA>
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 1                                           0.1428571
## 1859                                               NA
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 1                                              N
## 1859                                        <NA>
##      sold.fctr.predict.All.Interact.X.bayesglm.prob
## 1                                          0.299625
## 1859                                             NA
##      sold.fctr.predict.All.Interact.X.bayesglm
## 1                                            N
## 1859                                      <NA>
##      sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 1                                         0.2022016
## 1859                                             NA
##      sold.fctr.predict.Interact.High.cor.Y.glm
## 1                                            N
## 1859                                      <NA>
##      sold.fctr.predict.All.X.bayesglm.prob
## 1                                0.3417333
## 1859                                    NA
##      sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 1                                   N                            0.4592869
## 1859                             <NA>                                   NA
##      sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 1                                  N                         0.388993
## 1859                            <NA>                               NA
##      sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 1                              N                                         1
## 1859                        <NA>                                        NA
##      sold.fctr.predict.All.Interact.X.glm
## 1                                       Y
## 1859                                 <NA>
##      sold.fctr.predict.Ensemble.glmnet.prob
## 1                                        NA
## 1859                              0.6425959
##      sold.fctr.predict.Ensemble.glmnet
## 1                                 <NA>
## 1859                                 Y
##      sold.fctr.predict.Ensemble.glmnet.accurate
## 1                                            NA
## 1859                                       TRUE
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 1                                                      0.274
## 1859                                                   0.090
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## 1                                                     N
## 1859                                                  N
##      sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 1                                             0.272
## 1859                                          0.078
##      sold.fctr.predict.All.X.no.rnorm.Train.rf
## 1                                            N
## 1859                                         N
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## 1                                                 0.2152642
## 1859                                              0.2152642
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## 1                                                    N
## 1859                                                 N
##      sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## 1                              0.2195853                              N
## 1859                           0.1039561                              N
##      sold.fctr.predict.Final.glmnet.accurate
## 1                                       TRUE
## 1859                                    TRUE
##      sold.fctr.predict.Final.glmnet.error .label
## 1                                       0  10001
## 1859                                    0  11861
## [1] "Inaccurate: "
##  [1] UniqueID                                               
##  [2] sold.fctr                                              
##  [3] sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob      
##  [4] sold.fctr.predict.All.Interact.X.no.rnorm.rf           
##  [5] sold.fctr.predict.All.X.no.rnorm.rf.prob               
##  [6] sold.fctr.predict.All.X.no.rnorm.rf                    
##  [7] sold.fctr.predict.Max.cor.Y.rpart.prob                 
##  [8] sold.fctr.predict.Max.cor.Y.rpart                      
##  [9] sold.fctr.predict.All.X.no.rnorm.rpart.prob            
## [10] sold.fctr.predict.All.X.no.rnorm.rpart                 
## [11] sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob   
## [12] sold.fctr.predict.All.Interact.X.no.rnorm.rpart        
## [13] sold.fctr.predict.All.Interact.X.glmnet.prob           
## [14] sold.fctr.predict.All.Interact.X.glmnet                
## [15] sold.fctr.predict.Max.cor.Y.glm.prob                   
## [16] sold.fctr.predict.Max.cor.Y.glm                        
## [17] sold.fctr.predict.All.X.glmnet.prob                    
## [18] sold.fctr.predict.All.X.glmnet                         
## [19] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob       
## [20] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart            
## [21] sold.fctr.predict.All.Interact.X.bayesglm.prob         
## [22] sold.fctr.predict.All.Interact.X.bayesglm              
## [23] sold.fctr.predict.Interact.High.cor.Y.glm.prob         
## [24] sold.fctr.predict.Interact.High.cor.Y.glm              
## [25] sold.fctr.predict.All.X.bayesglm.prob                  
## [26] sold.fctr.predict.All.X.bayesglm                       
## [27] sold.fctr.predict.Low.cor.X.glm.prob                   
## [28] sold.fctr.predict.Low.cor.X.glm                        
## [29] sold.fctr.predict.All.X.glm.prob                       
## [30] sold.fctr.predict.All.X.glm                            
## [31] sold.fctr.predict.All.Interact.X.glm.prob              
## [32] sold.fctr.predict.All.Interact.X.glm                   
## [33] sold.fctr.predict.Ensemble.glmnet.prob                 
## [34] sold.fctr.predict.Ensemble.glmnet                      
## [35] sold.fctr.predict.Ensemble.glmnet.accurate             
## [36] sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## [37] sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf     
## [38] sold.fctr.predict.All.X.no.rnorm.Train.rf.prob         
## [39] sold.fctr.predict.All.X.no.rnorm.Train.rf              
## [40] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 
## [41] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart      
## [42] sold.fctr.predict.Final.glmnet.prob                    
## [43] sold.fctr.predict.Final.glmnet                         
## [44] sold.fctr.predict.Final.glmnet.accurate                
## [45] sold.fctr.predict.Final.glmnet.error                   
## <0 rows> (or 0-length row.names)

dsp_feats_vctr <- c(NULL)
for(var in grep(".importance", names(glb_feats_df), fixed=TRUE, value=TRUE))
    dsp_feats_vctr <- union(dsp_feats_vctr, 
                            glb_feats_df[!is.na(glb_feats_df[, var]), "id"])

# print(glb_trnobs_df[glb_trnobs_df$UniqueID %in% FN_OOB_ids, 
#                     grep(glb_rsp_var, names(glb_trnobs_df), value=TRUE)])

print(setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
## [1] "sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob"
## [2] "sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf"     
## [3] "sold.fctr.predict.All.X.no.rnorm.Train.rf.prob"         
## [4] "sold.fctr.predict.All.X.no.rnorm.Train.rf"              
## [5] "sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob" 
## [6] "sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart"      
## [7] "sold.fctr.predict.Final.glmnet.prob"                    
## [8] "sold.fctr.predict.Final.glmnet"
for (col in setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
    # Merge or cbind ?
    glb_allobs_df[glb_allobs_df$.src == "Train", col] <- glb_trnobs_df[, col]

print(setdiff(names(glb_fitobs_df), names(glb_allobs_df)))
## character(0)
print(setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
## character(0)
for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
    # Merge or cbind ?
    glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <- glb_OOBobs_df[, col]
    
print(setdiff(names(glb_newobs_df), names(glb_allobs_df)))
## character(0)
if (glb_save_envir)
    save(glb_feats_df, glb_allobs_df, 
         #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
         glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
         glb_sel_mdl, glb_sel_mdl_id,
         glb_fin_mdl, glb_fin_mdl_id,
        file=paste0(glb_out_pfx, "dsk.RData"))

replay.petrisim(pn=glb_analytics_pn, 
    replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs, 
        "data.training.all.prediction","model.final")), flip_coord=TRUE)
## time trans    "bgn " "fit.data.training.all " "predict.data.new " "end " 
## 0.0000   multiple enabled transitions:  data.training.all data.new model.selected    firing:  data.training.all 
## 1.0000    1   2 1 0 0 
## 1.0000   multiple enabled transitions:  data.training.all data.new model.selected model.final data.training.all.prediction   firing:  data.new 
## 2.0000    2   1 1 1 0 
## 2.0000   multiple enabled transitions:  data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction   firing:  model.selected 
## 3.0000    3   0 2 1 0 
## 3.0000   multiple enabled transitions:  model.final data.training.all.prediction data.new.prediction     firing:  data.training.all.prediction 
## 4.0000    5   0 1 1 1 
## 4.0000   multiple enabled transitions:  model.final data.training.all.prediction data.new.prediction     firing:  model.final 
## 5.0000    4   0 0 2 1

glb_chunks_df <- myadd_chunk(glb_chunks_df, "predict.data.new", major.inc=TRUE)
##                label step_major step_minor     bgn     end elapsed
## 15 fit.data.training          8          1 419.196 425.621   6.426
## 16  predict.data.new          9          0 425.622      NA      NA

Step 9.0: predict data new

# Compute final model predictions

# sp_ only
# rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
# tmp_trnobs_df <- glb_get_predictions(glb_trnobs_df, mdl_id=glb_fin_mdl_id, 
#                                      rsp_var_out=glb_rsp_var_out,
#     prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial, 
#         glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
#                       "opt.prob.threshold.OOB"], NULL))
# tmp_newobs_df <- glb_get_predictions(glb_newobs_df, mdl_id=glb_fin_mdl_id, 
#                                      rsp_var_out=glb_rsp_var_out,
#     prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial, 
#         glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
#                       "opt.prob.threshold.OOB"], NULL))
# 
# tmp_allobs_df <- orderBy(~UniqueID, 
#                          rbind(tmp_trnobs_df[, c(glb_id_var, glb_rsp_var, rsp_var_out)],
#                                tmp_newobs_df[, c(glb_id_var, glb_rsp_var, rsp_var_out)]))
# names(tmp_allobs_df)[3] <- glb_rsp_var_out
# write.csv(tmp_allobs_df, paste0(glb_out_pfx, "predict.csv"), row.names=FALSE)
###

glb_newobs_df <- glb_get_predictions(glb_newobs_df, mdl_id=glb_fin_mdl_id, 
                                     rsp_var_out=glb_rsp_var_out,
    prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial, 
        glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
                      "opt.prob.threshold.OOB"], NULL))
## Warning in glb_get_predictions(glb_newobs_df, mdl_id = glb_fin_mdl_id,
## rsp_var_out = glb_rsp_var_out, : Using default probability threshold: 0.5
if (glb_is_classification && glb_is_binomial)
    glb_analytics_diag_plots(obs_df=glb_newobs_df, mdl_id=glb_fin_mdl_id, 
            prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
                                         "opt.prob.threshold.OOB"]) else
    glb_analytics_diag_plots(obs_df=glb_newobs_df, mdl_id=glb_fin_mdl_id)                  
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: Removed 798 rows containing missing values (geom_point).
## Warning: Removed 798 rows containing missing values (geom_point).

## [1] "Min/Max Boundaries: "
##      UniqueID sold.fctr
## 1860    11862      <NA>
## 2657    12659      <NA>
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 1860                                                   0.546
## 2657                                                   0.274
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## 1860                                                  N
## 2657                                                  N
##      sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 1860                                          0.480
## 2657                                          0.198
##      sold.fctr.predict.All.X.no.rnorm.Train.rf
## 1860                                         N
## 2657                                         N
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## 1860                                              0.2152642
## 2657                                              0.2152642
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## 1860                                                 N
## 2657                                                 N
##      sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## 1860                           0.4641735                              N
## 2657                           0.1912421                              N
##      sold.fctr.predict.Final.glmnet.accurate
## 1860                                      NA
## 2657                                      NA
##      sold.fctr.predict.Final.glmnet.error .label
## 1860                                    0  11862
## 2657                                    0  12659
## [1] "Inaccurate: "
##      UniqueID sold.fctr
## NA         NA      <NA>
## NA.1       NA      <NA>
## NA.2       NA      <NA>
## NA.3       NA      <NA>
## NA.4       NA      <NA>
## NA.5       NA      <NA>
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA                                                        NA
## NA.1                                                      NA
## NA.2                                                      NA
## NA.3                                                      NA
## NA.4                                                      NA
## NA.5                                                      NA
##      sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA                                                 <NA>
## NA.1                                               <NA>
## NA.2                                               <NA>
## NA.3                                               <NA>
## NA.4                                               <NA>
## NA.5                                               <NA>
##      sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA                                               NA
## NA.1                                             NA
## NA.2                                             NA
## NA.3                                             NA
## NA.4                                             NA
## NA.5                                             NA
##      sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA                                        <NA>
## NA.1                                      <NA>
## NA.2                                      <NA>
## NA.3                                      <NA>
## NA.4                                      <NA>
## NA.5                                      <NA>
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA                                                       NA
## NA.1                                                     NA
## NA.2                                                     NA
## NA.3                                                     NA
## NA.4                                                     NA
## NA.5                                                     NA
##      sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA                                                <NA>
## NA.1                                              <NA>
## NA.2                                              <NA>
## NA.3                                              <NA>
## NA.4                                              <NA>
## NA.5                                              <NA>
##      sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA                                    NA                           <NA>
## NA.1                                  NA                           <NA>
## NA.2                                  NA                           <NA>
## NA.3                                  NA                           <NA>
## NA.4                                  NA                           <NA>
## NA.5                                  NA                           <NA>
##      sold.fctr.predict.Final.glmnet.accurate
## NA                                        NA
## NA.1                                      NA
## NA.2                                      NA
## NA.3                                      NA
## NA.4                                      NA
## NA.5                                      NA
##      sold.fctr.predict.Final.glmnet.error
## NA                                     NA
## NA.1                                   NA
## NA.2                                   NA
## NA.3                                   NA
## NA.4                                   NA
## NA.5                                   NA
##        UniqueID sold.fctr
## NA.214       NA      <NA>
## NA.252       NA      <NA>
## NA.381       NA      <NA>
## NA.541       NA      <NA>
## NA.542       NA      <NA>
## NA.737       NA      <NA>
##        sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA.214                                                      NA
## NA.252                                                      NA
## NA.381                                                      NA
## NA.541                                                      NA
## NA.542                                                      NA
## NA.737                                                      NA
##        sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA.214                                               <NA>
## NA.252                                               <NA>
## NA.381                                               <NA>
## NA.541                                               <NA>
## NA.542                                               <NA>
## NA.737                                               <NA>
##        sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA.214                                             NA
## NA.252                                             NA
## NA.381                                             NA
## NA.541                                             NA
## NA.542                                             NA
## NA.737                                             NA
##        sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA.214                                      <NA>
## NA.252                                      <NA>
## NA.381                                      <NA>
## NA.541                                      <NA>
## NA.542                                      <NA>
## NA.737                                      <NA>
##        sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA.214                                                     NA
## NA.252                                                     NA
## NA.381                                                     NA
## NA.541                                                     NA
## NA.542                                                     NA
## NA.737                                                     NA
##        sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA.214                                              <NA>
## NA.252                                              <NA>
## NA.381                                              <NA>
## NA.541                                              <NA>
## NA.542                                              <NA>
## NA.737                                              <NA>
##        sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA.214                                  NA                           <NA>
## NA.252                                  NA                           <NA>
## NA.381                                  NA                           <NA>
## NA.541                                  NA                           <NA>
## NA.542                                  NA                           <NA>
## NA.737                                  NA                           <NA>
##        sold.fctr.predict.Final.glmnet.accurate
## NA.214                                      NA
## NA.252                                      NA
## NA.381                                      NA
## NA.541                                      NA
## NA.542                                      NA
## NA.737                                      NA
##        sold.fctr.predict.Final.glmnet.error
## NA.214                                   NA
## NA.252                                   NA
## NA.381                                   NA
## NA.541                                   NA
## NA.542                                   NA
## NA.737                                   NA
##        UniqueID sold.fctr
## NA.792       NA      <NA>
## NA.793       NA      <NA>
## NA.794       NA      <NA>
## NA.795       NA      <NA>
## NA.796       NA      <NA>
## NA.797       NA      <NA>
##        sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA.792                                                      NA
## NA.793                                                      NA
## NA.794                                                      NA
## NA.795                                                      NA
## NA.796                                                      NA
## NA.797                                                      NA
##        sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA.792                                               <NA>
## NA.793                                               <NA>
## NA.794                                               <NA>
## NA.795                                               <NA>
## NA.796                                               <NA>
## NA.797                                               <NA>
##        sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA.792                                             NA
## NA.793                                             NA
## NA.794                                             NA
## NA.795                                             NA
## NA.796                                             NA
## NA.797                                             NA
##        sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA.792                                      <NA>
## NA.793                                      <NA>
## NA.794                                      <NA>
## NA.795                                      <NA>
## NA.796                                      <NA>
## NA.797                                      <NA>
##        sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA.792                                                     NA
## NA.793                                                     NA
## NA.794                                                     NA
## NA.795                                                     NA
## NA.796                                                     NA
## NA.797                                                     NA
##        sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA.792                                              <NA>
## NA.793                                              <NA>
## NA.794                                              <NA>
## NA.795                                              <NA>
## NA.796                                              <NA>
## NA.797                                              <NA>
##        sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA.792                                  NA                           <NA>
## NA.793                                  NA                           <NA>
## NA.794                                  NA                           <NA>
## NA.795                                  NA                           <NA>
## NA.796                                  NA                           <NA>
## NA.797                                  NA                           <NA>
##        sold.fctr.predict.Final.glmnet.accurate
## NA.792                                      NA
## NA.793                                      NA
## NA.794                                      NA
## NA.795                                      NA
## NA.796                                      NA
## NA.797                                      NA
##        sold.fctr.predict.Final.glmnet.error
## NA.792                                   NA
## NA.793                                   NA
## NA.794                                   NA
## NA.795                                   NA
## NA.796                                   NA
## NA.797                                   NA
## Warning: Removed 798 rows containing missing values (geom_point).

if (glb_is_classification && glb_is_binomial) {
    submit_df <- glb_newobs_df[, c(glb_id_var, 
                                   paste0(glb_rsp_var_out, glb_fin_mdl_id, ".prob"))]
    names(submit_df)[2] <- "Probability1"
#     submit_df <- glb_newobs_df[, c(paste0(glb_rsp_var_out, glb_fin_mdl_id)), FALSE]
#     names(submit_df)[1] <- "BDscience"
#     submit_df$BDscience <- as.numeric(submit_df$BDscience) - 1
#     #submit_df <-rbind(submit_df, data.frame(bdanalytics=c(" ")))
#     print("Submission Stats:")
#     print(table(submit_df$BDscience, useNA = "ifany"))
    
    glb_force_prediction_lst <- list()
    glb_force_prediction_lst[["0"]] <- c(11885, 11907, 11943, 
                                         12050, 12115, 12171, 
                                         12253, 12285, 12367, 12388, 12399,
                                         12585)
    for (obs_id in glb_force_prediction_lst[["0"]]) {
        if (is.na(glb_allobs_df[glb_allobs_df[, glb_id_var] == obs_id, ".grpid"]))
            stop(".grpid is NA")
        submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
            max(0, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] - 0.5)
    }
    
    rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
    for (obs_id in glb_newobs_df[!is.na(glb_newobs_df[, rsp_var_out]) & 
                                 (glb_newobs_df[, rsp_var_out] == "Y") & 
                                 (glb_newobs_df[ , "startprice"] > 675), "UniqueID"])
        submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
            max(0, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] - 0.5)
    
    glb_force_prediction_lst[["1"]] <- c(11871, 11875, 11886, 
                                    11913, 11931, 11937, 11967, 11990, 11991, 11994, 11999,
                                         12000, 12002, 12018, 12021, 12065, 12072, 
                                         12111, 12114, 12126, 12152, 12172,
                                         12213, 12214, 12233, 12265, 12278, 12299, 
                                         12446, 12491, 
                                         12505, 12576, 12608, 12630)
    for (obs_id in glb_force_prediction_lst[["1"]]) {
        if (is.na(glb_allobs_df[glb_allobs_df[, glb_id_var] == obs_id, ".grpid"]))
            stop(".grpid is NA")
        submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
            min(0.9999, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] + 0.5)
    }    
    
} else submit_df <- glb_newobs_df[, c(glb_id_var, 
                                   paste0(glb_rsp_var_out, glb_fin_mdl_id))]

if (glb_is_classification) {
    rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
    tmp_newobs_df <- subset(glb_newobs_df[, c(glb_id_var, ".grpid", rsp_var_out)],
                            !is.na(.grpid))
    tmp_newobs_df <- merge(tmp_newobs_df, dupgrps_df, by=".grpid", all.x=TRUE)
    tmp_newobs_df <- merge(tmp_newobs_df, submit_df, by=glb_id_var, all.x = TRUE)
    tmp_newobs_df$.err <- 
        ((tmp_newobs_df$Probability1 > 0.5) & (tmp_newobs_df$sold.0 > 0) |
         (tmp_newobs_df$Probability1 < 0.5) & (tmp_newobs_df$sold.1 > 0))
    tmp_newobs_df <- orderBy(~UniqueID, subset(tmp_newobs_df, .err == TRUE))
    print("Prediction errors in duplicates:")
    print(tmp_newobs_df)
    
    if (nrow(tmp_newobs_df) > 0)
        stop("check Prediction errors in duplicates")
    #print(dupobs_df[dupobs_df$.grpid == 26, ])
    
    tmp_newobs_df <- cbind(glb_newobs_df, submit_df[, "Probability1", FALSE])
    if (max(glb_newobs_df[!is.na(glb_newobs_df[, rsp_var_out]) & 
                      (tmp_newobs_df[, "Probability1"] >= 0.5), "startprice"]) > 
        max(glb_allobs_df[!is.na(glb_allobs_df[, glb_rsp_var]) & 
                      (glb_allobs_df[, glb_rsp_var] == "Y"), "startprice"]))
        stop("startprice for some +ve predictions > 675")
}
## [1] "Prediction errors in duplicates:"
## [1] UniqueID                       .grpid                        
## [3] sold.fctr.predict.Final.glmnet sold.0                        
## [5] sold.1                         sold.NA                       
## [7] .freq                          Probability1                  
## [9] .err                          
## <0 rows> (or 0-length row.names)
submit_fname <- paste0(gsub(".", "_", paste0(glb_out_pfx, glb_fin_mdl_id), fixed=TRUE), 
                    "_submit.csv")
write.csv(submit_df, submit_fname, quote=FALSE, row.names=FALSE)
#cat(" ", "\n", file=submit_fn, append=TRUE)

# print(orderBy(~ -max.auc.OOB, glb_models_df[, c("model_id", 
#             "max.auc.OOB", "max.Accuracy.OOB")]))
for (txt_var in glb_txt_vars) {
    # Print post-stem-words but need post-stop-words for debugging ?
    print(sprintf("    All post-stem-words TfIDf terms for %s:", txt_var))
    myprint_df(glb_post_stem_words_terms_df_lst[[txt_var]])
    TfIdf_mtrx <- glb_post_stem_words_TfIdf_mtrx_lst[[txt_var]]
    print(glb_allobs_df[
        which(TfIdf_mtrx[, tail(glb_post_stem_words_terms_df_lst[[txt_var]], 1)$pos] > 0), 
                        c(glb_id_var, glb_txt_vars)])
    print(nrow(subset(glb_post_stem_words_terms_df_lst[[txt_var]], freq == 1)))
    #print(glb_allobs_df[which(TfIdf_mtrx[, 207] > 0), c(glb_id_var, glb_txt_vars)])
    #unlist(strsplit(glb_allobs_df[2157, "description"], ""))
    #glb_allobs_df[2442, c(glb_id_var, glb_txt_vars)]
    #TfIdf_mtrx[2442, TfIdf_mtrx[2442, ] > 0]  

    print(sprintf("    Top_n post_stem_words TfIDf terms for %s:", txt_var))
    tmp_df <- glb_post_stem_words_terms_df_lst[[txt_var]]
    top_n_vctr <- tmp_df$term[1:glb_txt_top_n[[txt_var]]]
    tmp_freq1_df <- subset(tmp_df, freq == 1)
    tmp_freq1_df$top_n <- grepl(paste0(top_n_vctr, collapse="|"), tmp_freq1_df$term)
    print(subset(tmp_freq1_df, top_n == TRUE))
}
## [1] "    All post-stem-words TfIDf terms for descr.my:"
##            TfIdf    term freq pos         cor.y    cor.y.abs  TfIdf.N
## condit  207.7156  condit  499 137 -0.0418798096 0.0418798096 82.38883
## use     144.7700     use  291 709  0.0103720246 0.0103720246 51.46753
## scratch 126.4831 scratch  286 565 -0.0088060862 0.0088060862 49.35848
## new     124.1683     new  156 429 -0.0372353149 0.0372353149 50.77429
## good    120.3335    good  197 281 -0.0004368629 0.0004368629 44.58392
## screen  105.7897  screen  213 566  0.0232373651 0.0232373651 36.89203
##          TfIdf.Y TfIdf.NA
## condit  56.35056 68.97623
## use     47.66515 45.63736
## scratch 40.20165 36.92302
## new     30.11628 43.27771
## good    38.21866 37.53088
## screen  37.92265 30.97500
##              TfIdf     term freq pos        cor.y   cor.y.abs   TfIdf.N
## almost   14.463235   almost   12  39  0.009256155 0.009256155 4.5360969
## retail   11.143946   retail    9 551 -0.004221734 0.004221734 3.0874822
## awesom    4.076122   awesom    2  65 -0.021525023 0.021525023 2.5938956
## first     2.939748    first    2 249 -0.021525023 0.021525023 2.0751165
## headphon  2.017474 headphon    2 298 -0.021525023 0.021525023 0.8646319
## therefor  1.137558 therefor    1 670 -0.021525023 0.021525023 1.1375583
##           TfIdf.Y  TfIdf.NA
## almost   5.323590 4.6035482
## retail   2.312503 5.7439603
## awesom   0.000000 1.4822261
## first    0.000000 0.8646319
## headphon 0.000000 1.1528425
## therefor 0.000000 0.0000000
##             TfIdf    term freq pos       cor.y  cor.y.abs   TfIdf.N
## red     0.8125416     red    1 532          NA         NA 0.0000000
## version 0.8125416 version    1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor    1  31  0.02500407 0.02500407 0.0000000
## divid   0.7583722   divid    1 194  0.02500407 0.02500407 0.0000000
## grey    0.7583722    grey    1 286  0.02500407 0.02500407 0.0000000
## hdmi    0.7583722    hdmi    1 297  0.02500407 0.02500407 0.0000000
##           TfIdf.Y  TfIdf.NA
## red     0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid   0.7583722 0.0000000
## grey    0.7583722 0.0000000
## hdmi    0.7583722 0.0000000
##     UniqueID
## 114    10114
##                                                                                                 descr.my
## 114 comes with: grey ipad divider case, white stylus, Apple USB wall charger and USB cord. HDMI adaptor 
## [1] 285
## [1] "    Top_n post_stem_words TfIDf terms for descr.my:"
##              TfIdf      term freq pos       cor.y  cor.y.abs  TfIdf.N
## appli     2.843896     appli    1  54  0.02500407 0.02500407 0.000000
## showroom  1.895930  showroom    1 592          NA         NA 0.000000
## backlit   1.625083   backlit    1  68 -0.02152502 0.02152502 1.625083
## paperwork 1.421948 paperwork    1 464          NA         NA 0.000000
## seen      1.421948      seen    1 575 -0.02152502 0.02152502 1.421948
## backlight 1.263954 backlight    1  67  0.02500407 0.02500407 0.000000
## seem      1.137558      seem    1 574          NA         NA 0.000000
##            TfIdf.Y TfIdf.NA top_n
## appli     2.843896 0.000000  TRUE
## showroom  0.000000 1.895930  TRUE
## backlit   0.000000 0.000000  TRUE
## paperwork 0.000000 1.421948  TRUE
## seen      0.000000 0.000000  TRUE
## backlight 1.263954 0.000000  TRUE
## seem      0.000000 1.137558  TRUE
if (glb_is_classification && glb_is_binomial)
    print(glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, 
                        "opt.prob.threshold.OOB"])
## [1] 0.5
print(sprintf("glb_sel_mdl_id: %s", glb_sel_mdl_id))
## [1] "glb_sel_mdl_id: Ensemble.glmnet"
print(sprintf("glb_fin_mdl_id: %s", glb_fin_mdl_id))
## [1] "glb_fin_mdl_id: Final.glmnet"
print(dim(glb_fitobs_df))
## [1] 969 201
print(dsp_models_df)
##                         model_id max.Accuracy.OOB max.auc.OOB
## 18    All.Interact.X.no.rnorm.rf        0.8483146   0.9142644
## 13             All.X.no.rnorm.rf        0.8438202   0.9180131
## 5                Max.cor.Y.rpart        0.8426966   0.8469855
## 12          All.X.no.rnorm.rpart        0.8426966   0.8469855
## 17 All.Interact.X.no.rnorm.rpart        0.8426966   0.8469855
## 16         All.Interact.X.glmnet        0.8359551   0.8742088
## 6                  Max.cor.Y.glm        0.8348315   0.8659702
## 11                  All.X.glmnet        0.8325843   0.8560007
## 4      Max.cor.Y.cv.0.cp.0.rpart        0.8202247   0.8997924
## 15       All.Interact.X.bayesglm        0.8179775   0.8660362
## 7        Interact.High.cor.Y.glm        0.8146067   0.8576352
## 10                All.X.bayesglm        0.7842697   0.8427064
## 8                  Low.cor.X.glm        0.7786517   0.8382546
## 9                      All.X.glm        0.7741573   0.8308232
## 14            All.Interact.X.glm        0.6797753   0.6856640
## 1              MFO.myMFO_classfr        0.5359551   0.5000000
## 3           Max.cor.Y.cv.0.rpart        0.5359551   0.5000000
## 2        Random.myrandom_classfr        0.4640449   0.5185354
##    max.Kappa.OOB min.aic.fit opt.prob.threshold.OOB
## 18     0.6930078          NA                    0.6
## 13     0.6854548          NA                    0.5
## 5      0.6791719          NA                    0.9
## 12     0.6791719          NA                    0.9
## 17     0.6791719          NA                    0.9
## 16     0.6661923          NA                    0.6
## 6      0.6639612    883.4623                    0.7
## 11     0.6580401          NA                    0.7
## 4      0.6403332          NA                    0.3
## 15     0.6319103   1164.3831                    0.5
## 7      0.6240496    887.8417                    0.6
## 10     0.5654496   1056.6761                    0.5
## 8      0.5546405    914.1270                    0.5
## 9      0.5454499    931.5575                    0.5
## 14     0.3658021  14993.8106                    0.9
## 1      0.0000000          NA                    0.5
## 3      0.0000000          NA                    0.5
## 2      0.0000000          NA                    0.4
if (glb_is_regression) {
    print(sprintf("%s OOB RMSE: %0.4f", glb_sel_mdl_id,
                  glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, "min.RMSE.OOB"]))

    if (!is.null(glb_category_var)) {
        tmp_OOBobs_df <- glb_OOBobs_df[, c(glb_category_var, glb_rsp_var,
                                           predct_error_var_name)]
        names(tmp_OOBobs_df)[length(names(tmp_OOBobs_df))] <- "error.abs.OOB"
        sOOB_ctgry_df <- dplyr::group_by_(tmp_OOBobs_df, glb_category_var)
        sOOB_ctgry_df <- dplyr::count(sOOB_ctgry_df, 
                                      startprice.OOB.sum = sum(startprice),
                                        err.abs.OOB.sum = sum(error.abs.OOB),
                                        err.abs.OOB.mean = mean(error.abs.OOB))
        names(sOOB_ctgry_df)[4] <- ".n.OOB"
        sOOB_ctgry_df <- dplyr::ungroup(sOOB_ctgry_df)
        #intersect(names(glb_ctgry_df), names(sOOB_ctgry_df))
        glb_ctgry_df <- merge(glb_ctgry_df, sOOB_ctgry_df, all=TRUE)
        print(orderBy(~-err.abs.OOB.mean, glb_ctgry_df))
    }
    
    if ((glb_rsp_var %in% names(glb_newobs_df)) &&
        !(any(is.na(glb_newobs_df[, glb_rsp_var])))) {
            pred_stats_df <- 
                mypredict_mdl(mdl=glb_models_lst[[glb_fin_mdl_id]], 
                              df=glb_newobs_df, 
                              rsp_var=glb_rsp_var, 
                              rsp_var_out=glb_rsp_var_out, 
                              model_id_method=glb_fin_mdl_id, 
                              label="new",
                              model_summaryFunction=glb_sel_mdl$control$summaryFunction, 
                              model_metric=glb_sel_mdl$metric,
                              model_metric_maximize=glb_sel_mdl$maximize,
                              ret_type="stats")        
            print(sprintf("%s prediction stats for glb_newobs_df:", glb_fin_mdl_id))
            print(pred_stats_df)
    }    
}

if (glb_is_classification) {
    print(sprintf("%s OOB confusion matrix & accuracy: ", glb_sel_mdl_id))
    print(t(confusionMatrix(glb_OOBobs_df[, paste0(glb_rsp_var_out, glb_sel_mdl_id)], 
                            glb_OOBobs_df[, glb_rsp_var])$table))

    if (!is.null(glb_category_var)) {
        tmp_OOBobs_df <- glb_OOBobs_df[, c(glb_category_var, predct_accurate_var_name)]
        names(tmp_OOBobs_df)[length(names(tmp_OOBobs_df))] <- "accurate.OOB"
        aOOB_ctgry_df <- mycreate_xtab_df(tmp_OOBobs_df, names(tmp_OOBobs_df)) 
        aOOB_ctgry_df[is.na(aOOB_ctgry_df)] <- 0
        aOOB_ctgry_df <- mutate(aOOB_ctgry_df, 
                                .n.OOB = accurate.OOB.FALSE + accurate.OOB.TRUE,
                                max.accuracy.OOB = accurate.OOB.TRUE / .n.OOB)
        #intersect(names(glb_ctgry_df), names(aOOB_ctgry_df))
        glb_ctgry_df <- merge(glb_ctgry_df, aOOB_ctgry_df, all=TRUE)
        print(orderBy(~-accurate.OOB.FALSE, glb_ctgry_df))
        
        print(glb_OOBobs_df[(glb_OOBobs_df$prdline.my == "iPadAir") & 
                            !(glb_OOBobs_df[, predct_accurate_var_name]), 
                            c(glb_id_var, glb_rsp_var_raw,
                              #"description"
                              "biddable", "startprice", "condition"
                              )])
    }
    
    if ((glb_rsp_var %in% names(glb_newobs_df)) &&
        !(any(is.na(glb_newobs_df[, glb_rsp_var])))) {
        print(sprintf("%s new confusion matrix & accuracy: ", glb_fin_mdl_id))
        print(t(confusionMatrix(glb_newobs_df[, paste0(glb_rsp_var_out, glb_fin_mdl_id)], 
                                glb_newobs_df[, glb_rsp_var])$table))
    }    

}    
## [1] "Ensemble.glmnet OOB confusion matrix & accuracy: "
##          Prediction
## Reference   N   Y
##         N 415  62
##         Y  75 338
##    prdl.my.descr.fctr .n.OOB .n.Tst .freqRatio.Tst .freqRatio.OOB
## 5            iPad 2#0     93     83     0.10401003     0.10449438
## 9           iPadAir#0     98     88     0.11027569     0.11011236
## 11      iPadmini 2+#0     71     64     0.08020050     0.07977528
## 6            iPad 2#1     79     71     0.08897243     0.08876404
## 3            iPad 1#0     52     46     0.05764411     0.05842697
## 8           iPad 3+#1     71     64     0.08020050     0.07977528
## 14         iPadmini#1     54     49     0.06140351     0.06067416
## 1           Unknown#0     50     45     0.05639098     0.05617978
## 4            iPad 1#1     48     43     0.05388471     0.05393258
## 7           iPad 3+#0     66     59     0.07393484     0.07415730
## 13         iPadmini#0     73     65     0.08145363     0.08202247
## 2           Unknown#1     47     42     0.05263158     0.05280899
## 10          iPadAir#1     54     49     0.06140351     0.06067416
## 12      iPadmini 2+#1     34     30     0.03759398     0.03820225
##    accurate.OOB.FALSE accurate.OOB.TRUE max.accuracy.OOB
## 5                  20                73        0.7849462
## 9                  16                82        0.8367347
## 11                 14                57        0.8028169
## 6                  11                68        0.8607595
## 3                  10                42        0.8076923
## 8                  10                61        0.8591549
## 14                  9                45        0.8333333
## 1                   8                42        0.8400000
## 4                   8                40        0.8333333
## 7                   8                58        0.8787879
## 13                  8                65        0.8904110
## 2                   7                40        0.8510638
## 10                  4                50        0.9259259
## 12                  4                30        0.8823529
##      UniqueID sold biddable startprice                condition
## 1156    11156    0        1     299.99                      New
## 1562    11563    0        0     300.00                     Used
## 19      10019    1        0     375.00                     Used
## 51      10051    1        0     614.99                      New
## 109     10109    1        0     339.99                      New
## 277     10277    1        0     300.00                     Used
## 297     10297    1        1     490.00                      New
## 535     10535    1        1     380.00                      New
## 1059    11059    1        0     500.00                      New
## 1132    11132    1        0     339.00                     Used
## 1200    11200    1        0     379.99                     Used
## 1212    11212    1        0     450.00                      New
## 1218    11218    1        0     349.99                     Used
## 1353    11354    1        0     300.00                     Used
## 1381    11382    1        0     439.99                      New
## 1604    11605    1        0     229.00 For parts or not working
## 353     10353    1        0     292.50                     Used
## 436     10436    1        0     500.00                     Used
## 675     10675    1        0     280.00                     Used
## 794     10794    1        1     525.00                     Used
dsp_myCategory_conf_mtrx <- function(myCategory) {
    print(sprintf("%s OOB::myCategory=%s confusion matrix & accuracy: ", 
                  glb_sel_mdl_id, myCategory))
    print(t(confusionMatrix(
        glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, 
                      paste0(glb_rsp_var_out, glb_sel_mdl_id)], 
        glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, glb_rsp_var])$table))
    print(sum(glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, 
                            predct_accurate_var_name]) / 
         nrow(glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, ]))
    err_ids <- glb_OOBobs_df[(glb_OOBobs_df$myCategory == myCategory) & 
                             (!glb_OOBobs_df[, predct_accurate_var_name]), glb_id_var]

    OOB_FNerr_df <- glb_OOBobs_df[(glb_OOBobs_df$UniqueID %in% err_ids) & 
                               (glb_OOBobs_df$Popular == 1), 
                        c(
                            ".clusterid", 
                            "Popular", "Headline", "Snippet", "Abstract")]
    print(sprintf("%s OOB::myCategory=%s FN errors: %d", glb_sel_mdl_id, myCategory,
                  nrow(OOB_FNerr_df)))
    print(OOB_FNerr_df)

    OOB_FPerr_df <- glb_OOBobs_df[(glb_OOBobs_df$UniqueID %in% err_ids) & 
                               (glb_OOBobs_df$Popular == 0), 
                        c(
                            ".clusterid", 
                            "Popular", "Headline", "Snippet", "Abstract")]
    print(sprintf("%s OOB::myCategory=%s FP errors: %d", glb_sel_mdl_id, myCategory,
                  nrow(OOB_FPerr_df)))
    print(OOB_FPerr_df)
}
#dsp_myCategory_conf_mtrx(myCategory="OpEd#Opinion#")
#dsp_myCategory_conf_mtrx(myCategory="Business#Business Day#Dealbook")
#dsp_myCategory_conf_mtrx(myCategory="##")

# if (glb_is_classification) {
#     print("FN_OOB_ids:")
#     print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids, 
#                         grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])
#     print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids, 
#                         glb_txt_vars])
#     print(dsp_vctr <- colSums(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids, 
#                         setdiff(grep("[HSA].", names(glb_OOBobs_df), value=TRUE),
#                                 union(myfind_chr_cols_df(glb_OOBobs_df),
#                     grep(".fctr", names(glb_OOBobs_df), fixed=TRUE, value=TRUE)))]))
# }

dsp_hdlpfx_results <- function(hdlpfx) {
    print(hdlpfx)
    print(glb_OOBobs_df[glb_OOBobs_df$Headline.pfx %in% c(hdlpfx), 
                        grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])
    print(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx), 
                        grep(glb_rsp_var, names(glb_newobs_df), value=TRUE)])
    print(dsp_vctr <- colSums(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx), 
                        setdiff(grep("[HSA]\\.", names(glb_newobs_df), value=TRUE),
                                union(myfind_chr_cols_df(glb_newobs_df),
                    grep(".fctr", names(glb_newobs_df), fixed=TRUE, value=TRUE)))]))
    print(dsp_vctr <- dsp_vctr[dsp_vctr != 0])
    print(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx), 
                        union(names(dsp_vctr), myfind_chr_cols_df(glb_newobs_df))])
}
#dsp_hdlpfx_results(hdlpfx="Ask Well::")

# print("myMisc::|OpEd|blank|blank|1:")
# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% c(6446), 
#                     grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])

# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids, 
#                     c("WordCount", "WordCount.log", "myMultimedia",
#                       "NewsDesk", "SectionName", "SubsectionName")])
# print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains="[Vv]ideo"), ], 
#                           c(glb_rsp_var, "myMultimedia")))
# dsp_chisq.test(Headline.contains="[Vi]deo")
# print(glb_allobs_df[sel_obs(Headline.contains="[Vv]ideo"), 
#                           c(glb_rsp_var, "Popular", "myMultimedia", "Headline")])
# print(glb_allobs_df[sel_obs(Headline.contains="[Ee]bola", Popular=1), 
#                           c(glb_rsp_var, "Popular", "myMultimedia", "Headline",
#                             "NewsDesk", "SectionName", "SubsectionName")])
# print(subset(glb_feats_df, !is.na(importance))[,
#     c("is.ConditionalX.y", 
#       grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, is.ConditionalX.y & is.na(importance))[,
#     c("is.ConditionalX.y", 
#       grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, !is.na(importance))[,
#     c("zeroVar", "nzv", "myNearZV", 
#       grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, is.na(importance))[,
#     c("zeroVar", "nzv", "myNearZV", 
#       grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
print(orderBy(as.formula(paste0("~ -", glb_sel_mdl_id, ".importance")), glb_featsimp_df))
##                                                         Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob                                100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob                        98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                          7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob                            4.587832
## sold.fctr.predict.All.Interact.X.glm.prob                                 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob                              0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob                      0.000000
## sold.fctr.predict.All.X.bayesglm.prob                                     0.000000
## sold.fctr.predict.All.X.glm.prob                                          0.000000
## sold.fctr.predict.All.X.glmnet.prob                                       0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                               0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                            0.000000
## sold.fctr.predict.Low.cor.X.glm.prob                                      0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob                                      0.000000
## sold.fctr.predict.Max.cor.Y.rpart.prob                                    0.000000
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob                                  NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob                         NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob                          NA
##                                                         importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob                        NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob               NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob                  NA
## sold.fctr.predict.All.Interact.X.glm.prob                       NA
## sold.fctr.predict.All.Interact.X.glmnet.prob                    NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob            NA
## sold.fctr.predict.All.X.bayesglm.prob                           NA
## sold.fctr.predict.All.X.glm.prob                                NA
## sold.fctr.predict.All.X.glmnet.prob                             NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                     NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                  NA
## sold.fctr.predict.Low.cor.X.glm.prob                            NA
## sold.fctr.predict.Max.cor.Y.glm.prob                            NA
## sold.fctr.predict.Max.cor.Y.rpart.prob                          NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob           100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob   99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob     0.00000
##                                                         Final.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob                                     NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob                            NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob                             NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob                               NA
## sold.fctr.predict.All.Interact.X.glm.prob                                    NA
## sold.fctr.predict.All.Interact.X.glmnet.prob                                 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob                         NA
## sold.fctr.predict.All.X.bayesglm.prob                                        NA
## sold.fctr.predict.All.X.glm.prob                                             NA
## sold.fctr.predict.All.X.glmnet.prob                                          NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob                                  NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob                               NA
## sold.fctr.predict.Low.cor.X.glm.prob                                         NA
## sold.fctr.predict.Max.cor.Y.glm.prob                                         NA
## sold.fctr.predict.Max.cor.Y.rpart.prob                                       NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob                        100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob                99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob                  0.00000
print("glb_newobs_df prediction stats:")
## [1] "glb_newobs_df prediction stats:"
print(myplot_histogram(glb_newobs_df, paste0(glb_rsp_var_out, glb_fin_mdl_id)))

if (glb_is_classification)
    print(table(glb_newobs_df[, paste0(glb_rsp_var_out, glb_fin_mdl_id)]))
## 
##   N   Y 
## 557 241
# players_df <- data.frame(id=c("Chavez", "Giambi", "Menechino", "Myers", "Pena"),
#                          OBP=c(0.338, 0.391, 0.369, 0.313, 0.361),
#                          SLG=c(0.540, 0.450, 0.374, 0.447, 0.500),
#                         cost=c(1400000, 1065000, 295000, 800000, 300000))
# players_df$RS.predict <- predict(glb_models_lst[[csm_mdl_id]], players_df)
# print(orderBy(~ -RS.predict, players_df))

if (length(diff <- setdiff(names(glb_trnobs_df), names(glb_allobs_df))) > 0)   
    print(diff)
for (col in setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
    # Merge or cbind ?
    glb_allobs_df[glb_allobs_df$.src == "Train", col] <- glb_trnobs_df[, col]

if (length(diff <- setdiff(names(glb_fitobs_df), names(glb_allobs_df))) > 0)   
    print(diff)
if (length(diff <- setdiff(names(glb_OOBobs_df), names(glb_allobs_df))) > 0)   
    print(diff)

for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
    # Merge or cbind ?
    glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <- glb_OOBobs_df[, col]
    
if (length(diff <- setdiff(names(glb_newobs_df), names(glb_allobs_df))) > 0)   
    print(diff)

if (glb_save_envir)
    save(glb_feats_df, glb_allobs_df, 
         #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
         glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
         glb_sel_mdl, glb_sel_mdl_id,
         glb_fin_mdl, glb_fin_mdl_id,
        file=paste0(glb_out_pfx, "prdnew_dsk.RData"))

rm(submit_df, tmp_OOBobs_df)

# tmp_replay_lst <- replay.petrisim(pn=glb_analytics_pn, 
#     replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs, 
#         "data.new.prediction")), flip_coord=TRUE)
# print(ggplot.petrinet(tmp_replay_lst[["pn"]]) + coord_flip())

glb_chunks_df <- myadd_chunk(glb_chunks_df, "display.session.info", major.inc=TRUE)
##                   label step_major step_minor     bgn     end elapsed
## 16     predict.data.new          9          0 425.622 434.567   8.945
## 17 display.session.info         10          0 434.568      NA      NA

Null Hypothesis (\(\sf{H_{0}}\)): mpg is not impacted by am_fctr.
The variance by am_fctr appears to be independent. #{r q1, cache=FALSE} # print(t.test(subset(cars_df, am_fctr == "automatic")$mpg, # subset(cars_df, am_fctr == "manual")$mpg, # var.equal=FALSE)$conf) # We reject the null hypothesis i.e. we have evidence to conclude that am_fctr impacts mpg (95% confidence). Manual transmission is better for miles per gallon versus automatic transmission.

##                      label step_major step_minor     bgn     end elapsed
## 11              fit.models          7          1  92.533 236.654 144.122
## 14       fit.data.training          8          0 276.638 419.196 142.558
## 5         extract.features          3          0  17.868  53.117  35.249
## 12              fit.models          7          2 236.655 268.982  32.327
## 10              fit.models          7          0  67.216  92.532  25.317
## 16        predict.data.new          9          0 425.622 434.567   8.945
## 13              fit.models          7          3 268.982 276.637   7.655
## 15       fit.data.training          8          1 419.196 425.621   6.426
## 8          select.features          5          0  60.320  66.001   5.681
## 7      manage.missing.data          4          1  54.646  60.319   5.673
## 2             inspect.data          2          0  12.560  16.480   3.920
## 1              import.data          1          0   8.653  12.560   3.907
## 6             cluster.data          4          0  53.117  54.646   1.529
## 9  partition.data.training          6          0  66.001  67.215   1.214
## 3               scrub.data          2          1  16.480  17.229   0.749
## 4           transform.data          2          2  17.229  17.867   0.638
##    duration
## 11  144.121
## 14  142.558
## 5    35.249
## 12   32.327
## 10   25.316
## 16    8.945
## 13    7.655
## 15    6.425
## 8     5.681
## 7     5.673
## 2     3.920
## 1     3.907
## 6     1.529
## 9     1.214
## 3     0.749
## 4     0.638
## [1] "Total Elapsed Time: 434.567 secs"

## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.4 (Yosemite)
## 
## locale:
## [1] C/en_US.UTF-8/C/C/C/en_US.UTF-8
## 
## attached base packages:
##  [1] tcltk     grid      parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] randomForest_4.6-10 glmnet_2.0-2        arm_1.8-6          
##  [4] lme4_1.1-8          Matrix_1.2-2        MASS_7.3-43        
##  [7] rpart.plot_1.5.2    rpart_4.1-10        ROCR_1.0-7         
## [10] gplots_2.17.0       sampling_2.7        entropy_1.2.1      
## [13] dynamicTreeCut_1.62 proxy_0.4-15        tidyr_0.2.0        
## [16] tm_0.6-2            NLP_0.1-8           stringr_1.0.0      
## [19] dplyr_0.4.2         plyr_1.8.3          sqldf_0.4-10       
## [22] RSQLite_1.0.0       DBI_0.3.1           gsubfn_0.6-6       
## [25] proto_0.3-10        reshape2_1.4.1      gdata_2.17.0       
## [28] doMC_1.3.3          iterators_1.0.7     foreach_1.4.2      
## [31] doBy_4.5-13         survival_2.38-3     caret_6.0-52       
## [34] ggplot2_1.0.1       lattice_0.20-33    
## 
## loaded via a namespace (and not attached):
##  [1] splines_3.2.1       gtools_3.5.0        assertthat_0.1     
##  [4] stats4_3.2.1        yaml_2.1.13         slam_0.1-32        
##  [7] quantreg_5.11       chron_2.3-47        digest_0.6.8       
## [10] RColorBrewer_1.1-2  minqa_1.2.4         colorspace_1.2-6   
## [13] htmltools_0.2.6     lpSolve_5.6.11      BradleyTerry2_1.0-6
## [16] SparseM_1.6         scales_0.2.5        brglm_0.5-9        
## [19] mgcv_1.8-7          car_2.0-25          nnet_7.3-10        
## [22] lazyeval_0.1.10     pbkrtest_0.4-2      magrittr_1.5       
## [25] evaluate_0.7        nlme_3.1-121        class_7.3-13       
## [28] tools_3.2.1         formatR_1.2         munsell_0.4.2      
## [31] compiler_3.2.1      e1071_1.6-6         caTools_1.17.1     
## [34] nloptr_1.0.4        bitops_1.0-6        labeling_0.3       
## [37] rmarkdown_0.7       gtable_0.1.2        codetools_0.2-14   
## [40] abind_1.4-3         R6_2.1.0            knitr_1.10.5       
## [43] KernSmooth_2.23-15  stringi_0.5-5       Rcpp_0.12.0        
## [46] coda_0.17-1